The code below prepares an analysis environment and set-up all required libraries and functions for further analysis.
options(warn = -1)
options(encoding = "UTF-8")
# Clear workspace
rm(list = ls())
# Set language to En
Sys.setlocale(category = "LC_ALL", locale = "english")
## [1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252"
# Installing and loading libraries
libraries <- c("bib2df",
"dplyr",
"ggplot2",
"gridExtra",
"grid",
"kableExtra",
"readxl",
"tidyr",
"treemapify",
"scales",
"cowplot",
"colorspace")
if (length(setdiff(libraries, rownames(installed.packages()))) > 0) {
install.packages(setdiff(libraries, rownames(installed.packages())), dependencies = T)
}
sapply(libraries, function(libName) {
library(libName, character.only = TRUE)
})
## $bib2df
## [1] "bib2df" "stats" "graphics" "grDevices" "utils" "datasets"
## [7] "methods" "base"
##
## $dplyr
## [1] "dplyr" "bib2df" "stats" "graphics" "grDevices" "utils"
## [7] "datasets" "methods" "base"
##
## $ggplot2
## [1] "ggplot2" "dplyr" "bib2df" "stats" "graphics" "grDevices"
## [7] "utils" "datasets" "methods" "base"
##
## $gridExtra
## [1] "gridExtra" "ggplot2" "dplyr" "bib2df" "stats" "graphics"
## [7] "grDevices" "utils" "datasets" "methods" "base"
##
## $grid
## [1] "grid" "gridExtra" "ggplot2" "dplyr" "bib2df" "stats"
## [7] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## $kableExtra
## [1] "kableExtra" "grid" "gridExtra" "ggplot2" "dplyr"
## [6] "bib2df" "stats" "graphics" "grDevices" "utils"
## [11] "datasets" "methods" "base"
##
## $readxl
## [1] "readxl" "kableExtra" "grid" "gridExtra" "ggplot2"
## [6] "dplyr" "bib2df" "stats" "graphics" "grDevices"
## [11] "utils" "datasets" "methods" "base"
##
## $tidyr
## [1] "tidyr" "readxl" "kableExtra" "grid" "gridExtra"
## [6] "ggplot2" "dplyr" "bib2df" "stats" "graphics"
## [11] "grDevices" "utils" "datasets" "methods" "base"
##
## $treemapify
## [1] "treemapify" "tidyr" "readxl" "kableExtra" "grid"
## [6] "gridExtra" "ggplot2" "dplyr" "bib2df" "stats"
## [11] "graphics" "grDevices" "utils" "datasets" "methods"
## [16] "base"
##
## $scales
## [1] "scales" "treemapify" "tidyr" "readxl" "kableExtra"
## [6] "grid" "gridExtra" "ggplot2" "dplyr" "bib2df"
## [11] "stats" "graphics" "grDevices" "utils" "datasets"
## [16] "methods" "base"
##
## $cowplot
## [1] "cowplot" "scales" "treemapify" "tidyr" "readxl"
## [6] "kableExtra" "grid" "gridExtra" "ggplot2" "dplyr"
## [11] "bib2df" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## $colorspace
## [1] "colorspace" "cowplot" "scales" "treemapify" "tidyr"
## [6] "readxl" "kableExtra" "grid" "gridExtra" "ggplot2"
## [11] "dplyr" "bib2df" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
# Information about session
sessionInfo()
## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 26100)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
## system code page: 65001
##
## time zone: Europe/Warsaw
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] colorspace_2.1-1 cowplot_1.1.3 scales_1.3.0 treemapify_2.5.6
## [5] tidyr_1.3.1 readxl_1.4.3 kableExtra_1.4.0 gridExtra_2.3
## [9] ggplot2_3.5.1 dplyr_1.1.4 bib2df_1.1.2.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.9 utf8_1.2.4 generics_0.1.3 xml2_1.3.6
## [5] stringi_1.8.4 digest_0.6.37 magrittr_2.0.3 evaluate_1.0.1
## [9] bookdown_0.41 fastmap_1.2.0 cellranger_1.1.0 jsonlite_1.8.9
## [13] httr_1.4.7 purrr_1.0.2 fansi_1.0.6 viridisLite_0.4.2
## [17] jquerylib_0.1.4 cli_3.6.3 rlang_1.1.4 munsell_0.5.1
## [21] withr_3.0.2 cachem_1.1.0 yaml_2.3.10 tools_4.3.2
## [25] vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4 stringr_1.5.1
## [29] pkgconfig_2.0.3 pillar_1.9.0 bslib_0.8.0 gtable_0.3.6
## [33] glue_1.8.0 Rcpp_1.0.13-1 ggfittext_0.10.2 systemfonts_1.1.0
## [37] xfun_0.49 tibble_3.2.1 tidyselect_1.2.1 rstudioapi_0.17.1
## [41] knitr_1.49 htmltools_0.5.8.1 rmarkdown_2.29 svglite_2.1.3
## [45] humaniformat_0.6.0 compiler_4.3.2
# Function to clean and read BibTeX file
load_bibliography <- function(bib_file) {
# Read and clean the BibTeX file
lines <- base::readLines(bib_file, warn = FALSE) # Suppress warnings
lines <- base::gsub("[^[:print:]]", "", lines) # Remove non-printable characters
# Write cleaned lines to a temporary file
temp_file <- base::tempfile(fileext = ".bib")
base::writeLines(lines, temp_file)
# Read the cleaned BibTeX file
bib_df <- tryCatch(
bib2df::bib2df(temp_file),
error = function(e) {
base::message("Error reading BibTeX file: ", e$message)
return(NULL)
}
)
if (is.null(bib_df))
return(NULL)
return(bib_df)
}
environment(load_bibliography) <- new.env(parent = baseenv())
create_bib_summary <- function(bib_df) {
library(dplyr)
# Explicitly import the pipe operator
`%>%` <- dplyr::`%>%`
# Extract required columns (ENTRYTYPE and YEAR)
bib_summary <- bib_df %>%
dplyr::mutate(
entry_type = base::tolower(CATEGORY),
# Get the entry type
year = as.numeric(YEAR),
# Ensure the year is numeric
journal = JOURNAL
) %>%
dplyr::filter(!is.na(year)) # Remove entries with missing year
return(bib_summary)
}
environment(create_bib_summary) <- new.env(parent = baseenv())
plot_publication_distribution <- function(bib_summary) {
# Import pipe manually
`%>%` <- magrittr::`%>%`
# Check for valid data
if (is.null(bib_summary) || nrow(bib_summary) == 0) {
base::message("No valid data to plot.")
return(NULL)
}
# Build distribution and labels
type_distribution <- bib_summary %>%
dplyr::count(entry_type) %>%
dplyr::mutate(
prop = n / base::sum(n) * 100,
label = base::paste0(entry_type, "\n", n, " (", base::round(prop, 1), "%)"),
entry_type = base::as.character(entry_type)
)
# Create a consistent color palette
global_palette <- stats::setNames(
grDevices::colorRampPalette(ggsci::pal_jco("default")(3))(nrow(type_distribution)),
type_distribution$entry_type
)
base_color <- global_palette[1]
# Create the bar chart of total publications per year with a uniform blue fill
trend_plot <- ggplot2::ggplot(bib_summary, ggplot2::aes(x = year)) +
ggplot2::geom_bar(fill = base_color, color = "white") +
ggplot2::labs(title = "", x = "Publication Year", y = "Number of Publications") +
ggplot2::theme_minimal() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
plot.title = ggplot2::element_text(
hjust = 0.5,
size = 14,
face = "plain"
)
)
# Pie chart with the same palette
pie_chart <- ggplot2::ggplot(type_distribution,
ggplot2::aes(x = "", y = prop, fill = entry_type)) +
ggplot2::geom_col(width = 1, color = "white") +
ggplot2::coord_polar(theta = "y") +
ggplot2::scale_fill_manual(values = global_palette, drop = TRUE, name = "Group") +
ggplot2::theme_void() +
ggrepel::geom_label_repel(
ggplot2::aes(label = label),
position = ggplot2::position_stack(vjust = 0.5),
size = 4,
show.legend = FALSE,
segment.color = "grey50"
) +
ggplot2::theme(legend.position = "bottom")
# Title grob Publication Trends and Type Proportions
title_grob <- grid::textGrob(
"",
gp = grid::gpar(fontsize = 14, fontface = "plain")
)
# Combine plots
combined_plot <- gridExtra::grid.arrange(
title_grob,
gridExtra::arrangeGrob(trend_plot, pie_chart, ncol = 2),
ncol = 1,
heights = c(0.1, 1)
)
return(combined_plot)
}
# Isolate environment to avoid masking
environment(plot_publication_distribution) <- baseenv()
# Function to aggregate articles by category and journal
aggregate_by_journal <- function(bib_summary) {
library(dplyr)
# Explicitly import the pipe operator
`%>%` <- dplyr::`%>%`
journal_aggregation <- bib_summary %>%
dplyr::group_by(entry_type, journal) %>%
dplyr::summarise(article_count = dplyr::n(), .groups = 'drop') %>%
dplyr::arrange(desc(article_count))
return(journal_aggregation)
}
environment(aggregate_by_journal) <- new.env(parent = baseenv())
create_pie_chart <- function(summary_df,
title = "",
subtitle = "",
global_palette) {
# Pie chart with the same palette
pie_chart <- ggplot2::ggplot(summary_df, ggplot2::aes(x = "", y = percentage, fill = group)) +
ggplot2::geom_col(width = 1, color = "white") +
ggplot2::coord_polar(theta = "y") +
ggplot2::scale_fill_manual(
values = global_palette,
drop = TRUE,
guide = ggplot2::guide_legend(title = "Group")
) +
ggplot2::labs(title = title, subtitle = subtitle) +
ggplot2::theme_void() +
ggrepel::geom_label_repel(
ggplot2::aes(label = label),
position = ggplot2::position_stack(vjust = 0.5),
size = 4,
show.legend = FALSE,
segment.color = "grey50"
) +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "plain"),
plot.subtitle = ggplot2::element_text(hjust = 0.5),
legend.position = "right",
legend.direction = "vertical",
legend.title = ggplot2::element_text(face = "plain"),
legend.text = ggplot2::element_text(size = 10)
)
return(pie_chart)
}
environment(create_pie_chart) <- new.env(parent = baseenv())
create_treemap_chart <- function(summary_df,
title = "",
subtitle = "",
global_palette,
text_size = 11,
text_color = "white",
show_values = TRUE,
value_var = "count",
label_var = "label",
group_var = "group",
sort_by = "count",
sort_desc = TRUE) {
# Require necessary packages
if (!requireNamespace("treemapify", quietly = TRUE)) {
stop("Package 'treemapify' is needed for this function. Please install it.")
}
# Sort the data based on specified column
if (sort_by %in% names(summary_df)) {
summary_df <- summary_df[order(summary_df[[sort_by]], decreasing = sort_desc), ]
# Convert group to factor with levels in the sorted order to maintain sorting
summary_df[[group_var]] <- factor(summary_df[[group_var]], levels = unique(summary_df[[group_var]]))
}
# Create the treemap
treemap_chart <- ggplot2::ggplot(summary_df,
ggplot2::aes(
area = .data[[value_var]],
fill = .data[[group_var]],
label = if (show_values)
.data[[label_var]]
else
.data[[group_var]]
)) +
treemapify::geom_treemap() +
treemapify::geom_treemap_text(
colour = text_color,
place = "centre",
size = text_size,
fontface = "bold"
) +
ggplot2::scale_fill_manual(
values = global_palette,
drop = TRUE,
guide = ggplot2::guide_legend(title = "Group")
) +
ggplot2::labs(title = title, subtitle = subtitle) +
ggplot2::theme_minimal() +
ggplot2::theme(
legend.position = "right",
legend.direction = "vertical",
legend.title = ggplot2::element_text(face = "plain"),
legend.text = ggplot2::element_text(size = 10),
plot.title = ggplot2::element_text(
size = 14,
face = "plain",
hjust = 0.5
),
plot.subtitle = ggplot2::element_text(size = 12, hjust = 0.5)
)
return(treemap_chart)
}
environment(create_treemap_chart) <- new.env(parent = baseenv())
create_chartexpo_radial <- function(data_df,
category_var,
value_var,
label_var = NULL,
title = "",
subtitle = "",
color_palette = c("#4472C4",
"#ED7D31",
"#A5A5A5",
"#FFC000",
"#5B9BD5",
"#70AD47"),
max_value = NULL,
show_labels = TRUE,
show_legend = TRUE) {
# Ensure the data is properly sorted for radial display
data_df <- data_df[order(data_df[[category_var]]), ]
# Set maximum value for consistent scaling
if (is.null(max_value)) {
max_value <- max(data_df[[value_var]]) * 1.1 # Add 10% buffer
}
# Calculate positions and prepare data
n_categories <- nrow(data_df)
data_df$id <- 1:n_categories
data_df$angle <- 2 * pi * (data_df$id - 1) / n_categories
# Assign colors
if (length(color_palette) < n_categories) {
color_palette <- rep_len(color_palette, n_categories)
}
data_df$color <- color_palette[1:n_categories]
# Create the base plot
radial_plot <- ggplot2::ggplot(data_df) +
# Add background grid circles
ggplot2::geom_hline(
yintercept = seq(0, max_value, length.out = 5),
color = "gray90",
size = 0.5
) +
# Add background grid lines
ggplot2::geom_vline(
xintercept = seq(0, 2 * pi, length.out = n_categories + 1)[1:n_categories],
color = "gray90",
size = 0.5
) +
# Add the radial bars
ggplot2::geom_col(
ggplot2::aes(x = angle, y = .data[[value_var]], fill = .data[[category_var]]),
width = 2 * pi / (n_categories * 1.5),
alpha = 0.85
) +
# Add points at the end of each bar
ggplot2::geom_point(ggplot2::aes(x = angle, y = .data[[value_var]], color = .data[[category_var]]), size = 3) +
# Add value labels outside the bars
ggplot2::geom_text(
ggplot2::aes(
x = angle,
y = .data[[value_var]] * 1.15,
# Position outside the bar
label = if (!is.null(label_var))
.data[[label_var]]
else
.data[[value_var]],
),
hjust = 0.5,
vjust = 0.5,
size = 4,
# Increased font size
fontface = "bold",
color = "black"
) +
# Add category labels if requested
{
if (show_labels)
ggplot2::geom_text(
ggplot2::aes(
x = angle,
y = max_value * 1.3,
# Moved further out for more space
label = .data[[category_var]]
),
hjust = 0.5,
vjust = 0.5,
size = 4.5,
# Increased font size
fontface = "bold"
)
} +
# Configure the polar coordinates
ggplot2::coord_polar() +
# Set custom colors
ggplot2::scale_fill_manual(values = color_palette) +
ggplot2::scale_color_manual(values = color_palette) +
# Set limits - increased to accommodate the labels
ggplot2::ylim(0, max_value * 1.4) +
# Add titles
ggplot2::labs(title = title, subtitle = subtitle) +
# Theme customization to match ChartExpo style
ggplot2::theme_minimal() +
ggplot2::theme(
axis.text = ggplot2::element_blank(),
axis.title = ggplot2::element_blank(),
axis.ticks = ggplot2::element_blank(),
panel.grid = ggplot2::element_blank(),
plot.title = ggplot2::element_text(
size = 14,
face = "plain",
hjust = 0.5
),
plot.subtitle = ggplot2::element_text(size = 12, hjust = 0.5),
legend.position = if (show_legend)
"bottom"
else
"none",
legend.title = ggplot2::element_blank(),
plot.margin = ggplot2::unit(c(1, 1, 1, 1), "cm")
)
return(radial_plot)
}
environment(create_chartexpo_radial) <- new.env(parent = baseenv())
create_bubble_plot <- function(summary_df,
title = "",
subtitle = "",
global_palette) {
bubble_plot <- ggplot2::ggplot(summary_df,
ggplot2::aes(
x = YEAR,
y = group,
size = n,
fill = group
)) +
ggplot2::geom_point(shape = 21,
color = "white",
alpha = 0.8) +
# explicitly request a size legend, and give it point glyphs
ggplot2::scale_size_area(
max_size = 15,
guide = ggplot2::guide_legend(
title = "Count",
override.aes = base::list(
shape = 21,
fill = "grey70",
# or some neutral colour
color = "white",
# match your points
alpha = 0.8
)
)
) +
ggplot2::scale_fill_manual(values = global_palette,
guide = ggplot2::guide_legend(title = "Group")) +
ggplot2::labs(x = "Year", y = NULL) +
ggplot2::theme_minimal() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
panel.grid.major.y = ggplot2::element_blank(),
legend.position = "right",
legend.direction = "vertical",
legend.title = ggplot2::element_text(
face = "plain",
hjust = 0.5
),
legend.text = ggplot2::element_text(size = 10)
)
return(bubble_plot)
}
environment(create_bubble_plot) <- new.env(parent = baseenv())
create_summary_df <- function(df, column_name) {
library(dplyr)
# Explicitly import the pipe operator
`%>%` <- dplyr::`%>%`
df %>%
dplyr::count(!!dplyr::sym(column_name)) %>%
dplyr::mutate(
percentage = base::round(n / base::sum(n) * 100, 1),
label = base::paste0(n, " (", percentage, "%)"),
column = column_name # Add column name for faceting
) %>% dplyr::rename_with( ~ "group", 1) %>% dplyr::rename_with( ~ "count", 2)
}
environment(create_summary_df) <- new.env(parent = baseenv())
create_bubble_df <- function(year_df, references_categories) {
# Create a new dataframe for the bubble plot
bubble_df <- base::data.frame(bibtexkey = base::character(),
group = base::character(),
stringsAsFactors = FALSE)
# Loop through the references_categories list to extract bibtexkeys and their categories
for (category in base::names(references_categories)) {
keys <- references_categories[[category]]
# Split in case there are multiple keys in one string (comma separated)
keys <- base::unlist(base::strsplit(keys, ",\\s*"))
# Create temporary dataframe for this category
temp_df <- base::data.frame(
bibtexkey = keys,
group = category,
stringsAsFactors = FALSE
)
# Append to the main dataframe
bubble_df <- base::rbind(bubble_df, temp_df)
}
# Merge with original dataframe to get the YEAR
bubble_df <- base::merge(bubble_df, year_df[, c("bibtexkey", "YEAR")], by = "bibtexkey", all.x = TRUE)
has_any_na <- any(is.na(bubble_df$YEAR))
if (has_any_na) {
stop("Error: NA values found in the 'YEAR' column.")
}
# Count bibtexkeys by group and year
summary_df <- stats::aggregate(bibtexkey ~ group + YEAR, data = bubble_df, FUN = length)
base::names(summary_df)[3] <- "n"
# If there are any NA values in YEAR, remove them
summary_df <- summary_df[!base::is.na(summary_df$YEAR), ]
# Sort summary_df by group and YEAR
summary_df <- summary_df[base::order(summary_df$group, summary_df$YEAR), ]
# Make sure YEAR is numeric
summary_df$YEAR <-base::as.numeric(summary_df$YEAR)
return(summary_df)
}
environment(create_bubble_df) <- new.env(parent = baseenv())
Loading bibliographies.
bib_file <- "bibtex-information-fusion-document-classification.bib"
bib_df <- load_bibliography(bib_file)
head(bib_df)
## # A tibble: 6 x 39
## CATEGORY BIBTEXKEY ADDRESS ANNOTE AUTHOR BOOKTITLE CHAPTER CROSSREF EDITION
## <chr> <chr> <chr> <chr> <list> <chr> <chr> <chr> <chr>
## 1 ARTICLE wangq2022 <NA> <NA> <chr> <NA> <NA> <NA> <NA>
## 2 ARTICLE zhao2023 <NA> <NA> <chr> <NA> <NA> <NA> <NA>
## 3 ARTICLE cgoncalves2~ <NA> <NA> <chr> <NA> <NA> <NA> <NA>
## 4 ARTICLE reil2023 <NA> <NA> <chr> <NA> <NA> <NA> <NA>
## 5 ARTICLE debreuij2020 <NA> <NA> <chr> <NA> <NA> <NA> <NA>
## 6 ARTICLE liuw2021 <NA> <NA> <chr> <NA> <NA> <NA> <NA>
## # i 30 more variables: EDITOR <list>, HOWPUBLISHED <chr>, INSTITUTION <chr>,
## # JOURNAL <chr>, KEY <chr>, MONTH <chr>, NOTE <chr>, NUMBER <chr>,
## # ORGANIZATION <chr>, PAGES <chr>, PUBLISHER <chr>, SCHOOL <chr>,
## # SERIES <chr>, TITLE <chr>, TYPE <chr>, VOLUME <chr>, YEAR <dbl>,
## # ISSN <chr>, ABSTRACT <chr>, DOI <chr>, KEYWORDS <chr>, URL <chr>,
## # ISSUE <chr>, ISBN <chr>, COLLECTION <chr>, VENUE <chr>, PMID <chr>,
## # ARCHIVEPREFIX <chr>, EPRINT <chr>, CITY <chr>
excel_file <- "2-articles-information-fusion-summarisation-cleaned.xlsx"
excel_df <- readxl::read_excel(excel_file, sheet = 1)
excel_df <- excel_df %>%
filter(rowSums(!is.na(.)) > 0)
head(excel_df)
## # A tibble: 6 x 21
## l.p doi bibtexkey `Article title` `Short note` Please summarize or ~1
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 10.1016/j~ dacosta2~ "Providing a g~ "Multi-moda~ "The article \"Provid~
## 2 2 10.1007/9~ gallo2021 "Visual Word E~ "The study ~ "The paper \"Visual W~
## 3 3 10.1109/A~ gui2021 "Technology Fo~ "The study ~ "The paper \"Technolo~
## 4 4 10.18653/~ huc2021 "One-class Tex~ "The study ~ "The paper \"One-clas~
## 5 5 10.1145/3~ garg2021 "On-Device Doc~ "The study ~ "The paper \"On-Devic~
## 6 6 10.18653/~ ma2021 "On the (in)ef~ "The study ~ "The paper \"On the (~
## # i abbreviated name:
## # 1: `Please summarize or provide the most important details of the work. Use a maximum of five sentences.`
## # i 15 more variables: `What are the findings?` <chr>,
## # `What are the challenges?` <chr>,
## # `Identified the datasets used in the article` <chr>,
## # `Disambiguated datasets names` <chr>,
## # `What other models were selected for comparison?` <chr>, ...
table(unique(sort(excel_df$bibtexkey)) == unique(sort(bib_df$BIBTEXKEY)))
##
## TRUE
## 139
bib_summary <- create_bib_summary(bib_df)
plotPubDist <- plot_publication_distribution(bib_summary)
ggsave(
"fig-year-and-publication-type-pie-bar.pdf",
plot = plotPubDist,
width = 14,
height = 8
)
journal_aggregation <- aggregate_by_journal(bib_summary)
knitr::kable(journal_aggregation, caption = "Publication type") %>% kableExtra::kable_styling()
| entry_type | journal | article_count |
|---|---|---|
| inbook | Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) | 12 |
| article | Neurocomputing | 5 |
| article | Pattern Recognition | 5 |
| article | Applied Sciences | 4 |
| article | Expert Systems with Applications | 4 |
| article | IEEE Access | 3 |
| article | Information | 3 |
| inproceedings | CEUR Workshop Proceedings | 3 |
| article | IEEE Transactions on Pattern Analysis and Machine Intelligence | 2 |
| article | Information Fusion | 2 |
| article | Information Sciences | 2 |
| article | International Journal on Document Analysis and Recognition (IJDAR) | 2 |
| article | Journal of Machine Learning Research | 2 |
| article | Multimedia Tools and Applications | 2 |
| inbook | NA | 2 |
| inproceedings | Findings of the Association for Computational Linguistics: EMNLP 2022 | 2 |
| inproceedings | Proceedings - International Conference on Pattern Recognition | 2 |
| article | ACM Transactions on Asian and Low-Resource Language Information Processing | 1 |
| article | ACM Transactions on Knowledge Discovery from Data | 1 |
| article | Applied Intelligence | 1 |
| article | Artificial Intelligence Review | 1 |
| article | CEUR Workshop Proceedings | 1 |
| article | Computaci<U+FFFD>n y Sistemas | 1 |
| article | Computer Standards & Interfaces | 1 |
| article | Computers & Geosciences | 1 |
| article | Computers & Mathematics with Applications | 1 |
| article | Computers, Materials & Continua | 1 |
| article | Digital Communications and Networks | 1 |
| article | Electronics | 1 |
| article | Expert Systems | 1 |
| article | Future Generation Computer Systems | 1 |
| article | IAENG International Journal of Computer Science | 1 |
| article | ICIC Express Letters | 1 |
| article | IEEE MultiMedia | 1 |
| article | IEEE Transactions on Engineering Management | 1 |
| article | IEEE Transactions on Knowledge and Data Engineering | 1 |
| article | IEEE Transactions on Neural Networks and Learning Systems | 1 |
| article | International Journal of Computational Intelligence Systems | 1 |
| article | International Journal of Information Technology | 1 |
| article | International Journal of Intelligent Engineering and Systems | 1 |
| article | International Journal on Artificial Intelligence Tools | 1 |
| article | Journal of Ambient Intelligence and Humanized Computing | 1 |
| article | Journal of Artificial Intelligence Research | 1 |
| article | Journal of Biomedical Informatics | 1 |
| article | Knowledge-Based Systems | 1 |
| article | Machine Learning | 1 |
| article | Multimedia Systems | 1 |
| article | Neural Networks | 1 |
| article | Pattern Recognition and Image Analysis | 1 |
| article | Procedia Computer Science | 1 |
| article | Proceedings of the AAAI Conference on Artificial Intelligence | 1 |
| article | Procesamiento del Lenguaje Natural | 1 |
| article | Signal, Image and Video Processing | 1 |
| article | Soft Computing | 1 |
| article | Swarm and Evolutionary Computation | 1 |
| inbook | Communications in Computer and Information Science | 1 |
| inbook | Frontiers in Artificial Intelligence and Applications | 1 |
| inbook | Lecture Notes in Artificial Intelligence (Subseries of Lecture Notes in Computer Science) | 1 |
| inproceedings | 2009 IEEE International Workshop on Multimedia Signal Processing, MMSP ’09 | 1 |
| inproceedings | 2010 International Conference on Web Information Systems and Mining | 1 |
| inproceedings | 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL HLT 2016 - Proceedings of the Conference | 1 |
| inproceedings | 2018 IEEE Congress on Evolutionary Computation, CEC 2018 - Proceedings | 1 |
| inproceedings | 2022 10th International Conference on Affective Computing and Intelligent Interaction, ACII 2022 | 1 |
| inproceedings | 50th Annual Meeting of the Association for Computational Linguistics, ACL 2012 - Proceedings of the Conference | 1 |
| inproceedings | 6th International Conference on Fuzzy Systems and Knowledge Discovery, FSKD 2009 | 1 |
| inproceedings | ACM International Conference Proceeding Series | 1 |
| inproceedings | ACM Web Conference 2023 - Proceedings of the World Wide Web Conference, WWW 2023 | 1 |
| inproceedings | ACM-BCB 2016 - 7th ACM Conference on Bioinformatics, Computational Biology, and Health Informatics | 1 |
| inproceedings | Advances in Neural Information Processing Systems | 1 |
| inproceedings | Advances in Neural Information Processing Systems 22 - Proceedings of the 2009 Conference | 1 |
| inproceedings | EACL 2021 - 16th Conference of the European Chapter of the Association for Computational Linguistics, Proceedings of the Conference | 1 |
| inproceedings | ECNLP 2022 - 5th Workshop on e-Commerce and NLP, Proceedings of the Workshop | 1 |
| inproceedings | EMNLP 2020 - 2020 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference | 1 |
| inproceedings | Findings of the Association for Computational Linguistics: EMNLP 2023 | 1 |
| inproceedings | Frontiers in Artificial Intelligence and Applications | 1 |
| inproceedings | ICETC 2010 - 2010 2nd International Conference on Education Technology and Computer | 1 |
| inproceedings | MML’10 - Proceedings of the 3rd ACM International Workshop on Machine Learning and Music, Co-located with ACM Multimedia 2010 | 1 |
| inproceedings | NAACL HLT 2019 - 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies - Proceedings of the Conference | 1 |
| inproceedings | Proceedings - 2009 International Conference on Industrial and Information Systems, IIS 2009 | 1 |
| inproceedings | Proceedings - 2013 IEEE International Conference on Big Data, Big Data 2013 | 1 |
| inproceedings | Proceedings - 2015 IEEE 16th International Conference on Information Reuse and Integration, IRI 2015 | 1 |
| inproceedings | Proceedings - 2018 IEEE International Conference on Cognitive Computing, ICCC 2018 - Part of the 2018 IEEE World Congress on Services | 1 |
| inproceedings | Proceedings - IEEE International Conference on Data Mining Workshops, ICDM Workshops 2008 | 1 |
| inproceedings | Proceedings - The 1st International Conference on Intelligent Networks and Intelligent Systems, ICINIS 2008 | 1 |
| inproceedings | Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume | 1 |
| inproceedings | Proceedings of the 29th International Conference on Machine Learning, ICML 2012 | 1 |
| inproceedings | Proceedings of the 29th USENIX Security Symposium | 1 |
| inproceedings | Proceedings of the 3rd ACM India Joint International Conference on Data Science & Management of Data (8th ACM IKDD CODS & 26th COMAD) | 1 |
| inproceedings | Proceedings of the AAAI Conference on Artificial Intelligence | 1 |
| inproceedings | Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining | 1 |
| inproceedings | Proceedings of the ACM Symposium on Applied Computing | 1 |
| inproceedings | Proceedings of the Annual Meeting of the Association for Computational Linguistics | 1 |
| inproceedings | Proceedings of the IEEE International Conference on Computer Vision | 1 |
| inproceedings | Proceedings of the International Conference on Document Analysis and Recognition, ICDAR | 1 |
| inproceedings | Proceedings of the International Joint Conference on Neural Networks | 1 |
| inproceedings | SIGIR 2010 Proceedings - 33rd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval | 1 |
| inproceedings | SIGIR’12 - Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval | 1 |
| inproceedings | SemEval 2022 - 16th International Workshop on Semantic Evaluation, Proceedings of the Workshop | 1 |
| inproceedings | WCRML 2019 - Proceedings of the ACM Workshop on Crossmodal Learning and Application | 1 |
# Define the vectors
# Multimodal
MULTIMOAL_BIBTEXKEY <- c(
"ma2021",
"xianfang2024",
"reil2023",
"fujinumay2023",
"hessel2020",
"bakkalis2023",
"chenz2023",
"zouh2023",
"chenl2022",
"gallo2021",
"anget2018",
"rajendran2016",
"ghorbanali2024",
"jiangs2024",
"liub2024",
"liut2024",
"ronghaop2024",
"tengfeil2024",
"yushili2024",
"zhangy2024",
"arlqaraleshs2024",
"guod2023",
"jarquin2023",
"jarrahia2023",
"kenny2023",
"liangz2023",
"linckere2023",
"luzdearau2023",
"ortizperez2023",
"rasheeda2023",
"shah2023",
"dacosta2022",
"dongpin2022",
"kanchid2022",
"paraskevopoulos2022",
"sapeao2022",
"wangq2022",
"garg2021",
"guelorget2021",
"setiawan2021",
"wang2021",
"zingaro2021",
"braz2020",
"debreuij2020",
"zhu2020",
"jainr2019",
"ravikiranm2019",
"matricm2018",
"tellez2018",
"schmittm2017",
"cristanim2014",
"liparas2014",
"perezgraciat2010",
"zhangx2010",
"chens2009",
"chos2023",
"adwaithd2022",
"wajdm2024",
"kozienkop2023",
"carmona2020",
"argon2018",
"guptad2018",
"guq2022",
"chatziagapia2022",
"yuet2022",
"akhiamov2018",
"akhtiamovo2017",
"andriyanovn2022",
"jiangs2024",
"anget2018"
)
# Multiview
MULTIVIEW_BIBTEXKEY <- c(
"brefeldu2015",
"aminim2009",
"liy2013",
"zhangqi2024",
"doinychko2020",
"samya2023",
"sangy2022",
"jiax2021",
"liang2021",
"sus2021",
"yangp2014",
"maf2020",
"max2020",
"wangh2020",
"bhatt2019",
"chens2019",
"hoylea2019",
"wangh2019",
"ferreira2018",
"zhup2018",
"zhanz2017",
"xux2016",
"perinaa2013",
"zhangb2013",
"zhangd2013",
"guyo2012",
"kovesim2012",
"yangp2012",
"zhengw2011",
"chenb2009",
"zhangx2009",
"zhangb2008",
"fengz2024",
"jiz2024",
"xuy2024",
"varmanp2023",
"zhao2023",
"cgoncalves2022",
"liuj2022",
"luox2022",
"gui2021",
"huc2021",
"liuw2021",
"mmironczuk2020",
"mmironczuk2019",
"pengj2018",
"huz2017",
"sinorar2016",
"xuh2016",
"fakri2015",
"liuj2014",
"longg2013",
"lig2012",
"aminim2010",
"aminim2010b",
"suns2010",
"zhangx2010b",
"suns2008",
"matsubara2005",
"dasigiv2001",
"graffm2023",
"tianl2023",
"karisanip2022",
"lij2020",
"zhang2021",
"carmona2020",
"rajendran2016",
"liaox2015",
"gup2009",
"akhtiamov2019",
"yangp2014",
"hey2019",
"xuc2017",
"iglesias2016"
)
# Bibtex
ALL_BIBTEXKEY <- c(
"dacosta2022",
"gallo2021",
"gui2021",
"huc2021",
"garg2021",
"ma2021",
"zingaro2021",
"zhang2021",
"wang2021",
"liang2021",
"guelorget2021",
"sus2021",
"zhao2023",
"lij2020",
"jiax2021",
"maf2020",
"max2020",
"setiawan2021",
"braz2020",
"debreuij2020",
"hessel2020",
"doinychko2020",
"carmona2020",
"zhu2020",
"bhatt2019",
"hey2019",
"wangh2020",
"jainr2019",
"ravikiranm2019",
"mmironczuk2019",
"chens2019",
"wangh2019",
"hoylea2019",
"akhtiamov2019",
"anget2018",
"mmironczuk2020",
"zhup2018",
"matricm2018",
"tellez2018",
"akhiamov2018",
"guptad2018",
"ferreira2018",
"argon2018",
"pengj2018",
"xuc2017",
"schmittm2017",
"zhanz2017",
"akhtiamovo2017",
"huz2017",
"xuh2016",
"sinorar2016",
"xux2016",
"rajendran2016",
"iglesias2016",
"fakri2015",
"brefeldu2015",
"liaox2015",
"liparas2014",
"liuj2014",
"cristanim2014",
"zhangd2013",
"longg2013",
"perinaa2013",
"liy2013",
"zhangb2013",
"lig2012",
"yangp2012",
"kovesim2012",
"guyo2012",
"zhengw2011",
"zhangx2010",
"perezgraciat2010",
"aminim2010b",
"suns2010",
"zhangx2010b",
"aminim2010",
"chens2009",
"aminim2009",
"chenb2009",
"zhangx2009",
"gup2009",
"suns2008",
"zhangb2008",
"matsubara2005",
"dasigiv2001",
"ronghaop2024",
"tengfeil2024",
"liub2024",
"zhangy2024",
"xianfang2024",
"zhangqi2024",
"yushili2024",
"ghorbanali2024",
"jiz2024",
"xuy2024",
"fengz2024",
"wajdm2024",
"bakkalis2023",
"zouh2023",
"chenz2023",
"luzdearau2023",
"jarquin2023",
"linckere2023",
"kenny2023",
"tianl2023",
"varmanp2023",
"graffm2023",
"kozienkop2023",
"liangz2023",
"jarrahia2023",
"samya2023",
"liut2024",
"rasheeda2023",
"shah2023",
"fujinumay2023",
"chos2023",
"ortizperez2023",
"chenl2022",
"dongpin2022",
"guq2022",
"yuet2022",
"karisanip2022",
"reil2023",
"arlqaraleshs2024",
"sapeao2022",
"paraskevopoulos2022",
"sangy2022",
"liuj2022",
"adwaithd2022",
"kanchid2022",
"luox2022",
"jiangs2024",
"wangq2022",
"andriyanovn2022",
"chatziagapia2022",
"cgoncalves2022",
"guod2023",
"liuw2021",
"yangp2014"
)
# Print basic information
unique(c(MULTIMOAL_BIBTEXKEY, MULTIVIEW_BIBTEXKEY))
## [1] "ma2021" "xianfang2024" "reil2023"
## [4] "fujinumay2023" "hessel2020" "bakkalis2023"
## [7] "chenz2023" "zouh2023" "chenl2022"
## [10] "gallo2021" "anget2018" "rajendran2016"
## [13] "ghorbanali2024" "jiangs2024" "liub2024"
## [16] "liut2024" "ronghaop2024" "tengfeil2024"
## [19] "yushili2024" "zhangy2024" "arlqaraleshs2024"
## [22] "guod2023" "jarquin2023" "jarrahia2023"
## [25] "kenny2023" "liangz2023" "linckere2023"
## [28] "luzdearau2023" "ortizperez2023" "rasheeda2023"
## [31] "shah2023" "dacosta2022" "dongpin2022"
## [34] "kanchid2022" "paraskevopoulos2022" "sapeao2022"
## [37] "wangq2022" "garg2021" "guelorget2021"
## [40] "setiawan2021" "wang2021" "zingaro2021"
## [43] "braz2020" "debreuij2020" "zhu2020"
## [46] "jainr2019" "ravikiranm2019" "matricm2018"
## [49] "tellez2018" "schmittm2017" "cristanim2014"
## [52] "liparas2014" "perezgraciat2010" "zhangx2010"
## [55] "chens2009" "chos2023" "adwaithd2022"
## [58] "wajdm2024" "kozienkop2023" "carmona2020"
## [61] "argon2018" "guptad2018" "guq2022"
## [64] "chatziagapia2022" "yuet2022" "akhiamov2018"
## [67] "akhtiamovo2017" "andriyanovn2022" "brefeldu2015"
## [70] "aminim2009" "liy2013" "zhangqi2024"
## [73] "doinychko2020" "samya2023" "sangy2022"
## [76] "jiax2021" "liang2021" "sus2021"
## [79] "yangp2014" "maf2020" "max2020"
## [82] "wangh2020" "bhatt2019" "chens2019"
## [85] "hoylea2019" "wangh2019" "ferreira2018"
## [88] "zhup2018" "zhanz2017" "xux2016"
## [91] "perinaa2013" "zhangb2013" "zhangd2013"
## [94] "guyo2012" "kovesim2012" "yangp2012"
## [97] "zhengw2011" "chenb2009" "zhangx2009"
## [100] "zhangb2008" "fengz2024" "jiz2024"
## [103] "xuy2024" "varmanp2023" "zhao2023"
## [106] "cgoncalves2022" "liuj2022" "luox2022"
## [109] "gui2021" "huc2021" "liuw2021"
## [112] "mmironczuk2020" "mmironczuk2019" "pengj2018"
## [115] "huz2017" "sinorar2016" "xuh2016"
## [118] "fakri2015" "liuj2014" "longg2013"
## [121] "lig2012" "aminim2010" "aminim2010b"
## [124] "suns2010" "zhangx2010b" "suns2008"
## [127] "matsubara2005" "dasigiv2001" "graffm2023"
## [130] "tianl2023" "karisanip2022" "lij2020"
## [133] "zhang2021" "liaox2015" "gup2009"
## [136] "akhtiamov2019" "hey2019" "xuc2017"
## [139] "iglesias2016"
length(unique(c(
MULTIMOAL_BIBTEXKEY, MULTIVIEW_BIBTEXKEY
))) == length(ALL_BIBTEXKEY)
## [1] TRUE
setdiff(unique(c(
MULTIMOAL_BIBTEXKEY, MULTIVIEW_BIBTEXKEY
)), ALL_BIBTEXKEY)
## character(0)
setdiff(ALL_BIBTEXKEY, unique(c(
MULTIMOAL_BIBTEXKEY, MULTIVIEW_BIBTEXKEY
)))
## character(0)
names(table(unique(sort(excel_df$bibtexkey)))) == unique(sort(ALL_BIBTEXKEY))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE
GG_PLOTS <- c()
# Count papers in each group
multimodal_count <- length(unique(MULTIMOAL_BIBTEXKEY))
multiview_count <- length(unique(MULTIVIEW_BIBTEXKEY))
# Count papers in both categories (overlap)
papers_in_both <- length(intersect(MULTIMOAL_BIBTEXKEY, MULTIVIEW_BIBTEXKEY))
# Count papers exclusive to each group
multimodal_only <- multimodal_count - papers_in_both
multiview_only <- multiview_count - papers_in_both
# Define the short categories
categories <- c("multimodal", "multiview", "both")
# Create a data frame for the pie chart
paper_counts <- data.frame(
group = c("multimodal", "multiview", "both"),
count = c(multimodal_only, multiview_only, papers_in_both)
)
# Calculate percentages
paper_counts <- paper_counts %>%
mutate(
percentage = round(count / sum(count) * 100, 1),
label = paste0(count, " (", percentage, "%)"),
group = factor(group, levels = categories)
)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(3))(3),
categories)
# Create pie chart with ggplot2
ggplot_chart <- create_pie_chart(
paper_counts,
title = "",
subtitle = "",
global_palette = global_palette
)
GG_PLOTS <- append(GG_PLOTS, ggplot_chart)
ggsave(
"fig-mm-mv-both-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
ggplot_chart
knitr::kable(paper_counts, caption = "Paper Distribution by Category") %>% kableExtra::kable_styling()
| group | count | percentage | label |
|---|---|---|---|
| multimodal | 66 | 47.5 | 66 (47.5%) |
| multiview | 71 | 51.1 | 71 (51.1%) |
| both | 2 | 1.4 | 2 (1.4%) |
year_group_counts <- excel_df %>%
mutate(bibtexkey = tolower(bibtexkey)) %>%
inner_join(
bib_df %>% mutate(BIBTEXKEY = tolower(BIBTEXKEY)) %>% select('BIBTEXKEY', 'YEAR'),
by = c("bibtexkey" = "BIBTEXKEY")
) %>%
mutate(
in_mm = bibtexkey %in% tolower(MULTIMOAL_BIBTEXKEY),
in_mv = bibtexkey %in% tolower(MULTIVIEW_BIBTEXKEY),
group = case_when(
in_mm & !in_mv ~ "multimodal",
in_mv & !in_mm ~ "multiview",
in_mm & in_mv ~ "both",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(group)) %>%
count(YEAR, group, name = "n") %>%
mutate(group = factor(group, levels = categories))
bubble_plot <- create_bubble_plot(
year_group_counts,
title = "",
subtitle = "",
global_palette = global_palette
)
GG_PLOTS <- append(GG_PLOTS, bubble_plot)
paste("Multimodal pappers: ", sum(year_group_counts$n[year_group_counts$group == "multimodal"]))
## [1] "Multimodal pappers: 66"
paste("Multiview pappers: ", sum(year_group_counts$n[year_group_counts$group == "multiview"]))
## [1] "Multiview pappers: 71"
overlap_keys <- intersect(MULTIMOAL_BIBTEXKEY, MULTIVIEW_BIBTEXKEY)
print(overlap_keys)
## [1] "rajendran2016" "carmona2020"
bubble_plot
# 1. Wrap the title at ~60 characters per line
# Pie chart of the distribution of works on multimodal and multi-view learning (left chart) and the distribution of the number of these works by year (right figure)
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
# 2. Create a centered title grob
title_grob <- grid::textGrob(
wrapped_title,
gp = grid::gpar(fontsize = 12, fontface = "plain"),
just = "center" # center align
)
# 3. Draw: give a bit more room to the title (e.g. 1:10 height ratio)
combined_plot <- gridExtra::grid.arrange(
title_grob,
gridExtra::arrangeGrob(ggplot_chart, bubble_plot, ncol = 2),
ncol = 1,
heights = c(1, 10)
)
ggsave(
"fig-mm-mv-both-pie-bubble.pdf",
plot = combined_plot,
width = 14,
height = 8
)
multimodal_df <- excel_df %>%
filter(bibtexkey %in% MULTIMOAL_BIBTEXKEY)
head(multimodal_df)
## # A tibble: 6 x 21
## l.p doi bibtexkey `Article title` `Short note` Please summarize or ~1
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 10.1016/j~ dacosta2~ "Providing a g~ Multi-modal~ "The article \"Provid~
## 2 2 10.1007/9~ gallo2021 "Visual Word E~ The study f~ "The paper \"Visual W~
## 3 5 10.1145/3~ garg2021 "On-Device Doc~ The study u~ "The paper \"On-Devic~
## 4 6 10.18653/~ ma2021 "On the (in)ef~ The study i~ "The paper \"On the (~
## 5 7 10.1109/I~ zingaro2~ "Multimodal si~ The study f~ "The paper \"Multimod~
## 6 9 10.1109/M~ wang2021 "Implicit Emot~ The study f~ "The article titled \~
## # i abbreviated name:
## # 1: `Please summarize or provide the most important details of the work. Use a maximum of five sentences.`
## # i 15 more variables: `What are the findings?` <chr>,
## # `What are the challenges?` <chr>,
## # `Identified the datasets used in the article` <chr>,
## # `Disambiguated datasets names` <chr>,
## # `What other models were selected for comparison?` <chr>, ...
paste("Extracted", nrow(multimodal_df), "multimodal papers out of", nrow(excel_df), "total papers")
## [1] "Extracted 68 multimodal papers out of 139 total papers"
year_multimodal_df <- multimodal_df %>%
mutate(bibtexkey = tolower(bibtexkey)) %>%
inner_join(
bib_df %>% mutate(BIBTEXKEY = tolower(BIBTEXKEY)) %>% select('BIBTEXKEY', 'YEAR'),
by = c("bibtexkey" = "BIBTEXKEY")
)
general_framework <- list()
general_framework[['When to use multi-modal learning']] <- c('ma2021')
general_framework[['Feature selection as a multi-modal optimization problem']] <- c('xianfang2024')
general_framework[['Handling missing views or modalities']] <- c('reil2023')
general_framework[['Evaluating multi-modal model performance']] <- c('fujinumay2023, hessel2020')
general_framework[['Multimodal representation learning']] <- c('chenz2023, zouh2023, gallo2021, rajendran2016')
summary_df <- create_bubble_df(year_multimodal_df, general_framework)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(5))(5),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "General Framework",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "General Framework") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 2 | Evaluating multi-modal model performance | 2020 | 1 |
| 6 | Evaluating multi-modal model performance | 2023 | 1 |
| 8 | Feature selection as a multi-modal optimization problem | 2024 | 1 |
| 5 | Handling missing views or modalities | 2022 | 1 |
| 1 | Multimodal representation learning | 2016 | 1 |
| 3 | Multimodal representation learning | 2021 | 1 |
| 7 | Multimodal representation learning | 2023 | 2 |
| 4 | When to use multi-modal learning | 2021 | 1 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "General Framework") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Evaluating multi-modal model performance | 2 | 22.2 | 2 (22.2%) | group |
| Feature selection as a multi-modal optimization problem | 1 | 11.1 | 1 (11.1%) | group |
| Handling missing views or modalities | 1 | 11.1 | 1 (11.1%) | group |
| Multimodal representation learning | 4 | 44.4 | 4 (44.4%) | group |
| When to use multi-modal learning | 1 | 11.1 | 1 (11.1%) | group |
# 1. Wrap the title at ~60 characters per line
# Treemap of the Distribution of Works on Multimodal–General Framework (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mm-gen-frem-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
tasks <- list()
tasks[['Finance']] <- c('chos2023')
tasks[['Healthcare']] <- c('chenz2023, ortizperez2023, braz2020')
tasks[['Disaster response/managment']] <- c('arlqaraleshs2024, adwaithd2022, dacosta2022, debreuij2020')
tasks[['Air traffic control']] <- c('guod2023')
tasks[['Cultural heritage']] <- c('reil2023, perezgraciat2010')
tasks[['Education']] <- c('linckere2023, sapeao2022')
tasks[['E-commerce']] <- c('wajdm2024, chenl2022')
tasks[['Industrial fault diagnosis']] <- c('dongpin2022')
tasks[['Social Network']] <- c('shah2023')
tasks[['Social media analysis']] <- c('yushili2024, jarquin2023, kozienkop2023, guelorget2021, carmona2020, argon2018, guptad2018')
tasks[['Fake news detection']] <- c('jarrahia2023, liangz2023')
tasks[['Hate speech and offensive language detection']] <- c('kozienkop2023, guq2022')
tasks[['Emotion recognition']] <- c('ronghaop2024, kenny2023, wang2021, schmittm2017')
tasks[['Author profiling']] <- c('carmona2020, argon2018, matricm2018, tellez2018, cristanim2014')
tasks[['Document image classification']] <- c('jiangs2024, bakkalis2023, chos2023, fujinumay2023, luzdearau2023, rasheeda2023, kanchid2022, garg2021, zingaro2021, jainr2019')
#tasks[['Video classification']] <- c('')
tasks[['Speaker role identification']] <- c('guod2023')
tasks[['Sentiment analysis']] <- c('ghorbanali2024, liub2024, zhangy2024, setiawan2021, anget2018')
tasks[['Filled Pause Detection']] <- c('chatziagapia2022')
tasks[['Safety-Report Observations']] <- c('paraskevopoulos2022')
tasks[['Malware Text Classification']] <- c('ravikiranm2019')
tasks[['Research paper classification']] <- c('liut2024, yuet2022')
tasks[['Web document classification']] <- c('ma2021, liparas2014, zhangx2010')
summary_df <- create_bubble_df(year_multimodal_df, tasks)
base <- ggsci::pal_jco("default")(10)
extra <- c(lighten(base, 0.3), darken(base, 0.2))
palette_23 <- c(base, extra)[1:23]
global_palette <- stats::setNames(palette_23, unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Tasks",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Tasks") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 31 | Air traffic control | 2023 | 1 |
| 3 | Author profiling | 2014 | 1 |
| 6 | Author profiling | 2018 | 3 |
| 11 | Author profiling | 2020 | 1 |
| 1 | Cultural heritage | 2010 | 1 |
| 20 | Cultural heritage | 2022 | 1 |
| 12 | Disaster response/managment | 2020 | 1 |
| 21 | Disaster response/managment | 2022 | 2 |
| 32 | Disaster response/managment | 2023 | 1 |
| 9 | Document image classification | 2019 | 1 |
| 15 | Document image classification | 2021 | 2 |
| 22 | Document image classification | 2022 | 3 |
| 33 | Document image classification | 2023 | 3 |
| 44 | Document image classification | 2024 | 1 |
| 23 | E-commerce | 2022 | 1 |
| 34 | E-commerce | 2023 | 1 |
| 24 | Education | 2022 | 1 |
| 35 | Education | 2023 | 1 |
| 5 | Emotion recognition | 2017 | 1 |
| 16 | Emotion recognition | 2021 | 1 |
| 36 | Emotion recognition | 2023 | 1 |
| 45 | Emotion recognition | 2024 | 1 |
| 25 | Fake news detection | 2022 | 1 |
| 37 | Fake news detection | 2023 | 1 |
| 26 | Filled Pause Detection | 2022 | 1 |
| 38 | Finance | 2023 | 1 |
| 27 | Hate speech and offensive language detection | 2022 | 1 |
| 39 | Hate speech and offensive language detection | 2023 | 1 |
| 13 | Healthcare | 2020 | 1 |
| 40 | Healthcare | 2023 | 2 |
| 28 | Industrial fault diagnosis | 2022 | 1 |
| 10 | Malware Text Classification | 2019 | 1 |
| 29 | Research paper classification | 2022 | 1 |
| 46 | Research paper classification | 2024 | 1 |
| 30 | Safety-Report Observations | 2022 | 1 |
| 7 | Sentiment analysis | 2018 | 1 |
| 17 | Sentiment analysis | 2021 | 1 |
| 47 | Sentiment analysis | 2024 | 3 |
| 8 | Social media analysis | 2018 | 2 |
| 14 | Social media analysis | 2020 | 1 |
| 18 | Social media analysis | 2021 | 1 |
| 41 | Social media analysis | 2023 | 2 |
| 48 | Social media analysis | 2024 | 1 |
| 42 | Social Network | 2023 | 1 |
| 43 | Speaker role identification | 2023 | 1 |
| 2 | Web document classification | 2010 | 1 |
| 4 | Web document classification | 2014 | 1 |
| 19 | Web document classification | 2021 | 1 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Tasks") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Air traffic control | 1 | 1.6 | 1 (1.6%) | group |
| Author profiling | 5 | 8.2 | 5 (8.2%) | group |
| Cultural heritage | 2 | 3.3 | 2 (3.3%) | group |
| Disaster response/managment | 4 | 6.6 | 4 (6.6%) | group |
| Document image classification | 10 | 16.4 | 10 (16.4%) | group |
| E-commerce | 2 | 3.3 | 2 (3.3%) | group |
| Education | 2 | 3.3 | 2 (3.3%) | group |
| Emotion recognition | 4 | 6.6 | 4 (6.6%) | group |
| Fake news detection | 2 | 3.3 | 2 (3.3%) | group |
| Filled Pause Detection | 1 | 1.6 | 1 (1.6%) | group |
| Finance | 1 | 1.6 | 1 (1.6%) | group |
| Hate speech and offensive language detection | 2 | 3.3 | 2 (3.3%) | group |
| Healthcare | 3 | 4.9 | 3 (4.9%) | group |
| Industrial fault diagnosis | 1 | 1.6 | 1 (1.6%) | group |
| Malware Text Classification | 1 | 1.6 | 1 (1.6%) | group |
| Research paper classification | 2 | 3.3 | 2 (3.3%) | group |
| Safety-Report Observations | 1 | 1.6 | 1 (1.6%) | group |
| Sentiment analysis | 5 | 8.2 | 5 (8.2%) | group |
| Social Network | 1 | 1.6 | 1 (1.6%) | group |
| Social media analysis | 7 | 11.5 | 7 (11.5%) | group |
| Speaker role identification | 1 | 1.6 | 1 (1.6%) | group |
| Web document classification | 3 | 4.9 | 3 (4.9%) | group |
# 1. Wrap the title at ~60 characters per line
# Treemap of the Distribution of Works on Multimodal–Task (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mm-task-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
data_modalities <- list()
data_modalities[["Text + Image"]] <- c('ghorbanali2024, liub2024, liut2024, tengfeil2024, wajdm2024, yushili2024, zhangy2024, arlqaraleshs2024, bakkalis2023, chenz2023, chos2023, jarquin2023, liangz2023, linckere2023, luzdearau2023, rasheeda2023, zouh2023, adwaithd2022, andriyanovn2022, chenl2022, kanchid2022, paraskevopoulos2022, wangq2022, yuet2022, gallo2021, garg2021, guelorget2021, ma2021, setiawan2021, wang2021, zingaro2021, braz2020, carmona2020, hessel2020, jainr2019, ravikiranm2019, argon2018, guptad2018, matricm2018, tellez2018, rajendran2016, cristanim2014, liparas2014, zhangx2010, chens2009')
data_modalities[["Text + Audio"]] <- c('ronghaop2024, guod2023, ortizperez2023, chatziagapia2022, sapeao2022, zhu2020, akhtiamovo2017')
data_modalities[["Symbolic Music + Metadata"]] <- c('perezgraciat2010')
data_modalities[["Text + Metadata"]] <- c('jarrahia2023', 'shah2023, kozienkop2023, dacosta2022')
data_modalities[["Text + Image + Audio"]] <- c('akhiamov2018, schmittm2017')
data_modalities[["Text + Motion Capture + Audio"]] <- c('kenny2023')
data_modalities[["Text + Time series"]] <- c('dongpin2022, debreuij2020, anget2018')
data_modalities[["Text + Image + Metadata"]] <- c('jiangs2024, fujinumay2023, reil2023, guq2022')
summary_df <- create_bubble_df(year_multimodal_df, data_modalities)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(8))(8),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Data Modalities (combinations of text, image, audio)",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Data Modalities (combinations of text, image, audio)") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 2 | Symbolic Music + Metadata | 2010 | 1 |
| 6 | Text + Audio | 2017 | 1 |
| 12 | Text + Audio | 2020 | 1 |
| 16 | Text + Audio | 2022 | 2 |
| 21 | Text + Audio | 2023 | 2 |
| 26 | Text + Audio | 2024 | 1 |
| 1 | Text + Image | 2009 | 1 |
| 3 | Text + Image | 2010 | 1 |
| 4 | Text + Image | 2014 | 2 |
| 5 | Text + Image | 2016 | 1 |
| 8 | Text + Image | 2018 | 4 |
| 11 | Text + Image | 2019 | 2 |
| 13 | Text + Image | 2020 | 3 |
| 15 | Text + Image | 2021 | 7 |
| 17 | Text + Image | 2022 | 9 |
| 22 | Text + Image | 2023 | 9 |
| 27 | Text + Image | 2024 | 6 |
| 7 | Text + Image + Audio | 2017 | 1 |
| 9 | Text + Image + Audio | 2018 | 1 |
| 18 | Text + Image + Metadata | 2022 | 2 |
| 23 | Text + Image + Metadata | 2023 | 1 |
| 28 | Text + Image + Metadata | 2024 | 1 |
| 19 | Text + Metadata | 2022 | 2 |
| 24 | Text + Metadata | 2023 | 2 |
| 25 | Text + Motion Capture + Audio | 2023 | 1 |
| 10 | Text + Time series | 2018 | 1 |
| 14 | Text + Time series | 2020 | 1 |
| 20 | Text + Time series | 2022 | 1 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Data Modalities (combinations of text, image, audio)") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Symbolic Music + Metadata | 1 | 1.5 | 1 (1.5%) | group |
| Text + Audio | 7 | 10.4 | 7 (10.4%) | group |
| Text + Image | 45 | 67.2 | 45 (67.2%) | group |
| Text + Image + Audio | 2 | 3.0 | 2 (3%) | group |
| Text + Image + Metadata | 4 | 6.0 | 4 (6%) | group |
| Text + Metadata | 4 | 6.0 | 4 (6%) | group |
| Text + Motion Capture + Audio | 1 | 1.5 | 1 (1.5%) | group |
| Text + Time series | 3 | 4.5 | 3 (4.5%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Multimodal–Data Modalities (combinations of text, image, audio) (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 70
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mm-data-mod-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
classification_tasks <- list()
classification_tasks[['Binary Classification']] <- c('guod2023, jarquin2023, jarrahia2023, kozienkop2023, liangz2023, ortizperez2023, shah2023, adwaithd2022, chatziagapia2022, dacosta2022, guq2022, braz2020, debreuij2020, ravikiranm2019, akhiamov2018, argon2018, guptad2018, matricm2018, tellez2018, akhtiamovo2017')
classification_tasks[['Multi-class Classification']] <- c('ghorbanali2024, jiangs2024, liub2024, liut2024, ronghaop2024, tengfeil2024, wajdm2024, yushili2024, zhangy2024, bakkalis2023, chenz2023, chos2023, jarquin2023, kenny2023, linckere2023, luzdearau2023, reil2023, zouh2023, adwaithd2022, andriyanovn2022, chenl2022, dongpin2022, kanchid2022, paraskevopoulos2022, sapeao2022, wangq2022, yuet2022, gallo2021, garg2021, setiawan2021, wang2021, zingaro2021, carmona2020, hessel2020, zhu2020, jainr2019, anget2018, schmittm2017, rajendran2016, cristanim2014, liparas2014, perezgraciat2010, zhangx2010, chens2009')
classification_tasks[['Multi-label Classification']] <- c('fujinumay2023, guq2022, guelorget2021, ma2021')
summary_df <- create_bubble_df(year_multimodal_df, classification_tasks)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(6))(6),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Type of Classification Task",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Type of Classification Task") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 5 | Binary Classification | 2017 | 1 |
| 7 | Binary Classification | 2018 | 5 |
| 9 | Binary Classification | 2019 | 1 |
| 11 | Binary Classification | 2020 | 2 |
| 15 | Binary Classification | 2022 | 5 |
| 18 | Binary Classification | 2023 | 6 |
| 1 | Multi-class Classification | 2009 | 1 |
| 2 | Multi-class Classification | 2010 | 2 |
| 3 | Multi-class Classification | 2014 | 2 |
| 4 | Multi-class Classification | 2016 | 1 |
| 6 | Multi-class Classification | 2017 | 1 |
| 8 | Multi-class Classification | 2018 | 1 |
| 10 | Multi-class Classification | 2019 | 1 |
| 12 | Multi-class Classification | 2020 | 3 |
| 13 | Multi-class Classification | 2021 | 5 |
| 16 | Multi-class Classification | 2022 | 11 |
| 19 | Multi-class Classification | 2023 | 8 |
| 21 | Multi-class Classification | 2024 | 8 |
| 14 | Multi-label Classification | 2021 | 2 |
| 17 | Multi-label Classification | 2022 | 1 |
| 20 | Multi-label Classification | 2023 | 1 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Type of Classification Task") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Binary Classification | 20 | 29.4 | 20 (29.4%) | group |
| Multi-class Classification | 44 | 64.7 | 44 (64.7%) | group |
| Multi-label Classification | 4 | 5.9 | 4 (5.9%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Type of Classification Task (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 70
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mm-class-task-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
learning_paradigms <- list()
learning_paradigms[['Supervised Learning']] <- c('ghorbanali2024, jiangs2024, liub2024, liut2024, tengfeil2024, wajdm2024, yushili2024, chenz2023, guod2023, jarquin2023, jarrahia2023, kenny2023, kozienkop2023, liangz2023, linckere2023, luzdearau2023, ortizperez2023, rasheeda2023, shah2023, adwaithd2022, andriyanovn2022, chatziagapia2022, dacosta2022, dongpin2022, guq2022, sapeao2022, yuet2022, gallo2021, ma2021, setiawan2021, wang2021, garg2021, braz2020, carmona2020, debreuij2020, hessel2020, jainr2019, ravikiranm2019, akhiamov2018, argon2018, matricm2018, tellez2018, akhtiamovo2017, schmittm2017, cristanim2014, liparas2014, perezgraciat2010, zhangx2010, chens2009')
learning_paradigms[['Semi-supervised Learning']] <- c('chenl2022, anget2018, guptad2018')
learning_paradigms[['Active Learning']] <- c('guelorget2021')
learning_paradigms[['Transfer Learning']] <- c('ghorbanali2024, ronghaop2024, chenz2023, chos2023, fujinumay2023, chatziagapia2022, kanchid2022, wangq2022, zingaro2021, rajendran2016')
learning_paradigms[['Contrastive Learning']] <- c('bakkalis2023, zouh2023, chenl2022, paraskevopoulos2022')
learning_paradigms[['Multi-task Learning']] <- c('reil2023')
summary_df <- create_bubble_df(year_multimodal_df, learning_paradigms)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(6))(6),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Learning Paradigms",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Learning Paradigms") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 10 | Active Learning | 2021 | 1 |
| 13 | Contrastive Learning | 2022 | 2 |
| 18 | Contrastive Learning | 2023 | 2 |
| 14 | Multi-task Learning | 2022 | 1 |
| 6 | Semi-supervised Learning | 2018 | 2 |
| 15 | Semi-supervised Learning | 2022 | 1 |
| 1 | Supervised Learning | 2009 | 1 |
| 2 | Supervised Learning | 2010 | 2 |
| 3 | Supervised Learning | 2014 | 2 |
| 5 | Supervised Learning | 2017 | 2 |
| 7 | Supervised Learning | 2018 | 4 |
| 8 | Supervised Learning | 2019 | 2 |
| 9 | Supervised Learning | 2020 | 4 |
| 11 | Supervised Learning | 2021 | 5 |
| 16 | Supervised Learning | 2022 | 11 |
| 19 | Supervised Learning | 2023 | 10 |
| 21 | Supervised Learning | 2024 | 6 |
| 4 | Transfer Learning | 2016 | 1 |
| 12 | Transfer Learning | 2021 | 1 |
| 17 | Transfer Learning | 2022 | 3 |
| 20 | Transfer Learning | 2023 | 3 |
| 22 | Transfer Learning | 2024 | 2 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Learning Paradigms") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Active Learning | 1 | 1.5 | 1 (1.5%) | group |
| Contrastive Learning | 4 | 5.9 | 4 (5.9%) | group |
| Multi-task Learning | 1 | 1.5 | 1 (1.5%) | group |
| Semi-supervised Learning | 3 | 4.4 | 3 (4.4%) | group |
| Supervised Learning | 49 | 72.1 | 49 (72.1%) | group |
| Transfer Learning | 10 | 14.7 | 10 (14.7%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Learning Paradigms (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mm-learn-para-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
fusion_strategies <- list()
fusion_strategies[['Early Fusion']] <- c('arlqaraleshs2024, jiangs2024, liut2024, ronghaop2024, tengfeil2024, wajdm2024, zhangy2024, bakkalis2023, chos2023, guod2023, jarrahia2023, kenny2023, kozienkop2023, liangz2023, luzdearau2023, ortizperez2023, rasheeda2023, shah2023, adwaithd2022, dacosta2022, kanchid2022, paraskevopoulos2022, sapeao2022, wangq2022, yuet2022, gallo2021, guelorget2021, ma2021, zingaro2021, braz2020, carmona2020, debreuij2020, zhu2020, jainr2019, ravikiranm2019, anget2018, argon2018, guptad2018, schmittm2017, cristanim2014, perezgraciat2010')
fusion_strategies[['Late Fusion']] <- c('arlqaraleshs2024, liub2024, yushili2024, reil2023, andriyanovn2022, chatziagapia2022, dacosta2022, dongpin2022, guq2022, wang2021, garg2021, setiawan2021, ravikiranm2019, matricm2018, tellez2018, liparas2014, zhangx2010, chens2009')
fusion_strategies[['Hybrid Fusion']] <- c('ghorbanali2024, chenz2023, fujinumay2023, linckere2023, zouh2023, dacosta2022, ravikiranm2019, akhiamov2018, akhtiamovo2017, rajendran2016')
summary_df <- create_bubble_df(year_multimodal_df, fusion_strategies)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(6))(6),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Fusion Strategies",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Fusion Strategies") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 2 | Early Fusion | 2010 | 1 |
| 4 | Early Fusion | 2014 | 1 |
| 7 | Early Fusion | 2017 | 1 |
| 9 | Early Fusion | 2018 | 3 |
| 12 | Early Fusion | 2019 | 2 |
| 15 | Early Fusion | 2020 | 4 |
| 16 | Early Fusion | 2021 | 4 |
| 18 | Early Fusion | 2022 | 10 |
| 21 | Early Fusion | 2023 | 10 |
| 24 | Early Fusion | 2024 | 5 |
| 6 | Hybrid Fusion | 2016 | 1 |
| 8 | Hybrid Fusion | 2017 | 1 |
| 10 | Hybrid Fusion | 2018 | 1 |
| 13 | Hybrid Fusion | 2019 | 1 |
| 19 | Hybrid Fusion | 2022 | 1 |
| 22 | Hybrid Fusion | 2023 | 4 |
| 25 | Hybrid Fusion | 2024 | 1 |
| 1 | Late Fusion | 2009 | 1 |
| 3 | Late Fusion | 2010 | 1 |
| 5 | Late Fusion | 2014 | 1 |
| 11 | Late Fusion | 2018 | 2 |
| 14 | Late Fusion | 2019 | 1 |
| 17 | Late Fusion | 2021 | 3 |
| 20 | Late Fusion | 2022 | 6 |
| 23 | Late Fusion | 2023 | 1 |
| 26 | Late Fusion | 2024 | 2 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Fusion Strategies") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Early Fusion | 41 | 59.4 | 41 (59.4%) | group |
| Hybrid Fusion | 10 | 14.5 | 10 (14.5%) | group |
| Late Fusion | 18 | 26.1 | 18 (26.1%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Fusion Strategies (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 70
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mm-fusion-start-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
fusion_techniques <- list()
fusion_techniques[['Machine Learning based Fusion']] <- c('reil2023, dacosta2022, wangq2022, setiawan2021, wang2021, carmona2020, akhiamov2018, argon2018, tellez2018, akhtiamovo2017, liparas2014, zhangx2010, chens2009')
fusion_techniques[['Probabilistic Fusion']] <- c('ghorbanali2024, dongpin2022, cristanim2014, perezgraciat2010')
fusion_techniques[['Neural Network Fusion']] <- c('jiangs2024, liub2024, liut2024, ronghaop2024, tengfeil2024, wajdm2024, yushili2024, chos2023, fujinumay2023, jarquin2023, jarrahia2023, kenny2023, kozienkop2023, linckere2023, luzdearau2023, ortizperez2023, shah2023, zouh2023, adwaithd2022, chatziagapia2022, kanchid2022, paraskevopoulos2022, sapeao2022, yuet2022, gallo2021, garg2021, guelorget2021, ma2021, zingaro2021, braz2020, debreuij2020, jainr2019, ravikiranm2019, anget2018, guptad2018, rajendran2016')
fusion_techniques[['Attention Fusion']] <- c('bakkalis2023, chenz2023, guod2023, liangz2023, rasheeda2023, ronghaop2024, zhu2020')
summary_df <- create_bubble_df(year_multimodal_df, fusion_techniques)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(6))(6),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Fusion Techniques",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Fusion Techniques") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 11 | Attention Fusion | 2020 | 1 |
| 16 | Attention Fusion | 2022 | 1 |
| 20 | Attention Fusion | 2023 | 4 |
| 22 | Attention Fusion | 2024 | 1 |
| 1 | Machine Learning based Fusion | 2009 | 1 |
| 2 | Machine Learning based Fusion | 2010 | 1 |
| 4 | Machine Learning based Fusion | 2014 | 1 |
| 7 | Machine Learning based Fusion | 2017 | 1 |
| 8 | Machine Learning based Fusion | 2018 | 3 |
| 12 | Machine Learning based Fusion | 2020 | 1 |
| 14 | Machine Learning based Fusion | 2021 | 2 |
| 17 | Machine Learning based Fusion | 2022 | 3 |
| 6 | Neural Network Fusion | 2016 | 1 |
| 9 | Neural Network Fusion | 2018 | 2 |
| 10 | Neural Network Fusion | 2019 | 2 |
| 13 | Neural Network Fusion | 2020 | 2 |
| 15 | Neural Network Fusion | 2021 | 5 |
| 18 | Neural Network Fusion | 2022 | 8 |
| 21 | Neural Network Fusion | 2023 | 10 |
| 23 | Neural Network Fusion | 2024 | 6 |
| 3 | Probabilistic Fusion | 2010 | 1 |
| 5 | Probabilistic Fusion | 2014 | 1 |
| 19 | Probabilistic Fusion | 2022 | 1 |
| 24 | Probabilistic Fusion | 2024 | 1 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Fusion Techniques") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Attention Fusion | 7 | 11.7 | 7 (11.7%) | group |
| Machine Learning based Fusion | 13 | 21.7 | 13 (21.7%) | group |
| Neural Network Fusion | 36 | 60.0 | 36 (60%) | group |
| Probabilistic Fusion | 4 | 6.7 | 4 (6.7%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Fusion Techniques (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 70
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mm-fusion-tech-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
datasets <- list()
datasets[['Public Benchmark Datasets']] <- c('ghorbanali2024, liub2024, liut2024, tengfeil2024, wajdm2024, yushili2024, bakkalis2023, kenny2023, kozienkop2023, liangz2023, zouh2023, chenl2022, kanchid2022, wangq2022, gallo2021, zingaro2021, jainr2019, argon2018, matricm2018, tellez2018, rajendran2016, chens2009')
datasets[['Domain-specific Datasets']] <- c('jiangs2024, ronghaop2024, zhangy2024, arlqaraleshs2024, chenz2023, chos2023, fujinumay2023, guod2023, jarquin2023, jarrahia2023, linckere2023, luzdearau2023, ortizperez2023, rasheeda2023, reil2023, shah2023, adwaithd2022, andriyanovn2022, chatziagapia2022, chenl2022, dacosta2022, dongpin2022, guq2022, paraskevopoulos2022, sapeao2022, yuet2022, garg2021, guelorget2021, setiawan2021, wang2021, braz2020, carmona2020, debreuij2020, zhu2020, ravikiranm2019, akhiamov2018, anget2018, guptad2018, akhtiamovo2017, schmittm2017, cristanim2014, liparas2014, perezgraciat2010, zhangx2010')
datasets[['Multilingual Datasets']] <- c('chos2023, fujinumay2023, liangz2023, ma2021, argon2018, matricm2018')
summary_df <- create_bubble_df(year_multimodal_df, datasets)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(6))(6),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Datasets",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Datasets") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 2 | Domain-specific Datasets | 2010 | 2 |
| 3 | Domain-specific Datasets | 2014 | 2 |
| 5 | Domain-specific Datasets | 2017 | 2 |
| 6 | Domain-specific Datasets | 2018 | 3 |
| 9 | Domain-specific Datasets | 2019 | 1 |
| 11 | Domain-specific Datasets | 2020 | 4 |
| 12 | Domain-specific Datasets | 2021 | 4 |
| 15 | Domain-specific Datasets | 2022 | 14 |
| 17 | Domain-specific Datasets | 2023 | 9 |
| 20 | Domain-specific Datasets | 2024 | 3 |
| 7 | Multilingual Datasets | 2018 | 2 |
| 13 | Multilingual Datasets | 2021 | 1 |
| 18 | Multilingual Datasets | 2023 | 3 |
| 1 | Public Benchmark Datasets | 2009 | 1 |
| 4 | Public Benchmark Datasets | 2016 | 1 |
| 8 | Public Benchmark Datasets | 2018 | 3 |
| 10 | Public Benchmark Datasets | 2019 | 1 |
| 14 | Public Benchmark Datasets | 2021 | 2 |
| 16 | Public Benchmark Datasets | 2022 | 3 |
| 19 | Public Benchmark Datasets | 2023 | 6 |
| 21 | Public Benchmark Datasets | 2024 | 5 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Datasets") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Domain-specific Datasets | 44 | 61.1 | 44 (61.1%) | group |
| Multilingual Datasets | 6 | 8.3 | 6 (8.3%) | group |
| Public Benchmark Datasets | 22 | 30.6 | 22 (30.6%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Datasets (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mm-datasets-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
GG_PLOTS_REGISTER <- list()
# Rename the columns to make them easier to work with
multimodal_df_renamed <- multimodal_df %>%
rename(
datasets = 'Identified the datasets used in the article',
dis_datasets = 'Disambiguated datasets names',
models = 'What other models were selected for comparison?',
dis_models = 'Disambiguated models names',
metrics = 'Identified performance metrics used in the article',
dis_metrics = 'Disambiguated performance metrics names'
)
# Create better titles for each column
title_mapping <- c(
"dis_datasets" = "Disambiguated datasets names",
"dis_models" = "Disambiguated models names",
"dis_metrics" = "Disambiguated performance metrics names"
)
# Clean and standardize responses - convert everything to lower case and handle variations
multimodal_dis_df <- multimodal_df_renamed %>%
select('l.p', 'dis_datasets') %>%
separate_rows('dis_datasets', sep = "\\r?\\n") %>%
mutate(
dis_datasets = stringr::str_to_lower(dis_datasets),
dis_datasets = stringr::str_trim(dis_datasets),
dis_datasets = na_if(dis_datasets, "")
)
summary_df <- create_summary_df(multimodal_dis_df, 'dis_datasets') %>%
arrange(desc(count)) %>% filter(!is.na(group))
summary_cuted_df <- summary_df %>%
# 1) lump any low‑frequency group into "other"
mutate(group = if_else(count < 4, "other", group)) %>%
# 2) re‑aggregate by the (possibly new) group
group_by(group, column) %>%
summarise(count = sum(count), .groups = "drop") %>%
# 3) recompute percentages and labels over the new totals
mutate(percentage = count / sum(count) * 100,
label = paste0(count, " \n (", round(percentage, 1), "%)")) %>%
# 4) order by descending count
arrange(desc(count))
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(7))(7),
unique(summary_cuted_df$group))
ggplot_chart <- create_treemap_chart(
summary_df = summary_cuted_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggsave(
"fig-mm-eval-datasets-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['dis_datasets']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['dis_datasets']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| food-101 | 6 | 3.1 | 6 (3.1%) | dis_datasets |
| rvl-cdip | 6 | 3.1 | 6 (3.1%) | dis_datasets |
| twitter data | 6 | 3.1 | 6 (3.1%) | dis_datasets |
| wikipedia data | 5 | 2.6 | 5 (2.6%) | dis_datasets |
| amazon reviews | 4 | 2.0 | 4 (2%) | dis_datasets |
| tobacco document images | 4 | 2.0 | 4 (2%) | dis_datasets |
| fakenewsnet | 3 | 1.5 | 3 (1.5%) | dis_datasets |
| mmaterials | 3 | 1.5 | 3 (1.5%) | dis_datasets |
| mvsa (multimodal sentiment analysis) | 3 | 1.5 | 3 (1.5%) | dis_datasets |
| yelp reviews | 3 | 1.5 | 3 (1.5%) | dis_datasets |
| ag news | 2 | 1.0 | 2 (1%) | dis_datasets |
| da-vincis challenge data | 2 | 1.0 | 2 (1%) | dis_datasets |
| iemocap | 2 | 1.0 | 2 (1%) | dis_datasets |
| maapd | 2 | 1.0 | 2 (1%) | dis_datasets |
| pan 2018 authorship profiling | 2 | 1.0 | 2 (1%) | dis_datasets |
| reddit data | 2 | 1.0 | 2 (1%) | dis_datasets |
| reuters corpora | 2 | 1.0 | 2 (1%) | dis_datasets |
| roco (radiology objects in context) | 2 | 1.0 | 2 (1%) | dis_datasets |
| smart video corpus (svc) | 2 | 1.0 | 2 (1%) | dis_datasets |
| 20 newsgroups | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| aifun | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| alector | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| arabic book cover-28 dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| arousal dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| arxiv academic papers | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| atcspeech corpus | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| avocado | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| band-in-a-box files | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| bbc news | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| bill of loading (bl) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| book cover-28-ext dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| bookcover30 dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| business registration certificate (brc) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| chexpert | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| chnsenticorp | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| cifar-10 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| cmu-moseas | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| cockamamie gobbledegook | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| common voice 6.1 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| cub-200 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| custom dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| dataset was collected from the chengdu atc center | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| dbpedia | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| deepchart | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| demcare | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| dementiabank pitt corpus | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| deprem dataset (earthquake) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| docfigure | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| emofilm | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| emotion dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| emotional data (also referred to as sentimenti) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| enron | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| fantastiques exercices | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| ferramenta | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| ferramenta dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| filled pauses dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| geonames database | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| global satellite mapping of precipitation (gsmap) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| globalphone | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| golden-chnsenticorp (g-chnsenticorp) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| gossipcop | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| greek filtered c4 (gfc4) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| ground truth database team of object and concept recognition for content-based image retrieval of university of washington (image dataset) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| hellenic national corpus (hnc) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| historical flood events data from cge-sp | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| historical rainfall data (2009–2018) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| hydrobasins dataset level 9 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| i-ctx | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| i-int | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| i-sem | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| iit-cdip | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| imdb movie reviews | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| imdb-wiki dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| in-house dataset of documents | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| indonesian social media posts dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| inter1sp | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| ipc (international patent classification) system | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| level 1 classifier dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| level 2 classifier dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| librispeech | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| local newspaper dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| luxury standard | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| malwareqrdb | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| malwaretextdb-v2 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| manulex | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| mc-30 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| medicat | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| medvqa-2019 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| men-tr-3000 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| meteorological data from inmet | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| mexican twitter corpus (mex-a3t-500) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| midi files | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| mimic-cxr | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| mscoco dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| multieurlex-doc | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| multilingual ted corpus | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| mura (musculoskeletal radiographs) dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| müsilaj dataset (sea saliva) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| n24news | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| news visual-text dataset (nvtd) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| newsplease | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| nist special database 6 (also referred to as nist-tax form or nist) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| nus-wide dataset family | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| office-caltech | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| office-home | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| praxis gesture | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| r-pop | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| radnli | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| ravdess | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| recola | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| review | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| rg-65 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| rsna pneumonia | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| safety4all | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| savee | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| saganak dataset (downpour) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| sd dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| seempad | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| self-made dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| semeval-2022 multimedia automatic misogyny identification (mami) dataset<U+200B>. | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| sensitive images dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| sewa (automatic sentiment analysis in the wild) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| shinra2020-ml dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| slake | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| sogou news/text | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| spanish meacorpus 2023 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| speaking rate dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| stanford sentiment treebank (sst) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| svic+ | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| switchboard corpus (specifically the switchboard-nxt release) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| swt dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| t-st1 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| t-st2 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| t-vis | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| t4sa | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| tass 2017 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| the dataset used in the article is the fault records dataset from a factory, which includes both text sequence data and time series data associated with four different fault types. | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| the guardian | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| the main dataset used in this article is the silknow knowledge graph dataset. no other specific dataset names are mentioned in the article. | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| trafik kazasi dataset (traffic accident) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| trec question/text classification | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| tweets and images from users in english, spanish, and arabic. | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| twitter-15 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| university lecture recordings from universitat politècnica de valència (upv) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| uspto (united states patent and trademark office) patent dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| vad (voice activity detection) dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| valence dataset | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| vqa-rad | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| 1 | 0.5 | 1 (0.5%) | dis_datasets | |
| wiki-doc | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| ws-353 | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| yahoo answers | 1 | 0.5 | 1 (0.5%) | dis_datasets |
| yangin dataset (fire) | 1 | 0.5 | 1 (0.5%) | dis_datasets |
# Clean and standardize responses - convert everything to lower case and handle variations
multimodal_dis_df <- multimodal_df_renamed %>%
select('l.p', 'dis_models') %>%
separate_rows('dis_models', sep = "\\r?\\n") %>%
mutate(
dis_models = stringr::str_to_lower(dis_models),
dis_models = stringr::str_trim(dis_models),
dis_models = na_if(dis_models, "")
)
summary_df <- create_summary_df(multimodal_dis_df, 'dis_models') %>%
arrange(desc(count)) %>% filter(!is.na(group))
summary_cuted_df <- summary_df %>%
# 1) lump any low‑frequency group into "other"
mutate(group = if_else(count < 20, "other", group)) %>%
# 2) re‑aggregate by the (possibly new) group
group_by(group, column) %>%
summarise(count = sum(count), .groups = "drop") %>%
# 3) recompute percentages and labels over the new totals
mutate(percentage = count / sum(count) * 100,
label = paste0(count, " (", round(percentage, 1), "%)")) %>%
# 4) order by descending count
arrange(desc(count))
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(7))(7),
unique(summary_cuted_df$group))
ggplot_chart <- create_treemap_chart(
summary_df = summary_cuted_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggsave(
"fig-mm-eval-models-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['dis_models']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['dis_models']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| multi-modal/multi-view specific architectures & techniques | 61 | 16.3 | 61 (16.3%) | dis_models |
| convolutional neural networks (cnn) | 41 | 10.9 | 41 (10.9%) | dis_models |
| traditional/statistical machine learning & other methods | 24 | 6.4 | 24 (6.4%) | dis_models |
| transformer-based architectures | 23 | 6.1 | 23 (6.1%) | dis_models |
| bert (bidirectional encoder representations from transformers) | 22 | 5.9 | 22 (5.9%) | dis_models |
| long short-term memory (lstm) | 20 | 5.3 | 20 (5.3%) | dis_models |
| vggnet | 16 | 4.3 | 16 (4.3%) | dis_models |
| word embeddings and text representation | 14 | 3.7 | 14 (3.7%) | dis_models |
| resnet | 13 | 3.5 | 13 (3.5%) | dis_models |
| support vector machine (svm) | 13 | 3.5 | 13 (3.5%) | dis_models |
| layoutlm family (document ai transformers) | 9 | 2.4 | 9 (2.4%) | dis_models |
| roberta (robustly optimized bert pretraining approach) | 8 | 2.1 | 8 (2.1%) | dis_models |
| alexnet | 7 | 1.9 | 7 (1.9%) | dis_models |
| googlenet | 6 | 1.6 | 6 (1.6%) | dis_models |
| self-attention / attention | 6 | 1.6 | 6 (1.6%) | dis_models |
| naive bayes | 5 | 1.3 | 5 (1.3%) | dis_models |
| neural network | 5 | 1.3 | 5 (1.3%) | dis_models |
| densenet | 4 | 1.1 | 4 (1.1%) | dis_models |
| random forest | 4 | 1.1 | 4 (1.1%) | dis_models |
| vision transformer (vit) | 4 | 1.1 | 4 (1.1%) | dis_models |
| boosting methods | 3 | 0.8 | 3 (0.8%) | dis_models |
| decision tree | 3 | 0.8 | 3 (0.8%) | dis_models |
| distilbert | 3 | 0.8 | 3 (0.8%) | dis_models |
| gated recurrent unit (gru) | 3 | 0.8 | 3 (0.8%) | dis_models |
| k-nearest neighbors (knn) | 3 | 0.8 | 3 (0.8%) | dis_models |
| logistic regression | 3 | 0.8 | 3 (0.8%) | dis_models |
| recurrent neural network (rnn) | 3 | 0.8 | 3 (0.8%) | dis_models |
| xlnet | 3 | 0.8 | 3 (0.8%) | dis_models |
| albert | 2 | 0.5 | 2 (0.5%) | dis_models |
| bigbird | 2 | 0.5 | 2 (0.5%) | dis_models |
| efficientnet | 2 | 0.5 | 2 (0.5%) | dis_models |
| tobert | 2 | 0.5 | 2 (0.5%) | dis_models |
| vilt | 2 | 0.5 | 2 (0.5%) | dis_models |
| <U+200B> | 2 | 0.5 | 2 (0.5%) | dis_models |
| . | 1 | 0.3 | 1 (0.3%) | dis_models |
| autoencoders | 1 | 0.3 | 1 (0.3%) | dis_models |
| bagging | 1 | 0.3 | 1 (0.3%) | dis_models |
| convolutional recurrent neural network (crnn) | 1 | 0.3 | 1 (0.3%) | dis_models |
| deep multi-level attentive network (dmlanet) | 1 | 0.3 | 1 (0.3%) | dis_models |
| deeppatent | 1 | 0.3 | 1 (0.3%) | dis_models |
| gr-electra | 1 | 0.3 | 1 (0.3%) | dis_models |
| graph neural networks (gnn) | 1 | 0.3 | 1 (0.3%) | dis_models |
| latent dirichlet allocation (lda) | 1 | 0.3 | 1 (0.3%) | dis_models |
| latent semantic analysis (lsa) | 1 | 0.3 | 1 (0.3%) | dis_models |
| linear model | 1 | 0.3 | 1 (0.3%) | dis_models |
| longformer | 1 | 0.3 | 1 (0.3%) | dis_models |
| lstm (word2vec) - cnn (mouzannar, 2018) | 1 | 0.3 | 1 (0.3%) | dis_models |
| maria | 1 | 0.3 | 1 (0.3%) | dis_models |
| mobilenet | 1 | 0.3 | 1 (0.3%) | dis_models |
| nasnet | 1 | 0.3 | 1 (0.3%) | dis_models |
| robert | 1 | 0.3 | 1 (0.3%) | dis_models |
| rocchio classifier (centroid classifier) | 1 | 0.3 | 1 (0.3%) | dis_models |
| transformer | 1 | 0.3 | 1 (0.3%) | dis_models |
| xgboost (xgb) | 1 | 0.3 | 1 (0.3%) | dis_models |
| <U+200B><U+200B> | 1 | 0.3 | 1 (0.3%) | dis_models |
# Clean and standardize responses - convert everything to lower case and handle variations
multimodal_dis_df <- multimodal_df_renamed %>%
select('l.p', 'dis_metrics') %>%
separate_rows('dis_metrics', sep = "\\r?\\n") %>%
mutate(
dis_metrics = stringr::str_to_lower(dis_metrics),
dis_metrics = stringr::str_trim(dis_metrics),
dis_metrics = na_if(dis_metrics, "")
)
summary_df <- create_summary_df(multimodal_dis_df, 'dis_metrics') %>%
arrange(desc(count)) %>% filter(!is.na(group))
summary_cuted_df <- summary_df %>%
# 1) lump any low‑frequency group into "other"
mutate(group = if_else(count < 11, "other", group)) %>%
# 2) re‑aggregate by the (possibly new) group
group_by(group, column) %>%
summarise(count = sum(count), .groups = "drop") %>%
# 3) recompute percentages and labels over the new totals
mutate(percentage = count / sum(count) * 100,
label = paste0(count, " (", round(percentage, 1), "%)")) %>%
# 4) order by descending count
arrange(desc(count))
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(7))(7),
unique(summary_cuted_df$group))
ggplot_chart <- create_treemap_chart(
summary_df = summary_cuted_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggsave(
"fig-mm-eval-per-met-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['dis_metrics']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['dis_metrics']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| accuracy | 53 | 25.4 | 53 (25.4%) | dis_metrics |
| f1-score | 35 | 16.7 | 35 (16.7%) | dis_metrics |
| precision | 25 | 12.0 | 25 (12%) | dis_metrics |
| recall | 25 | 12.0 | 25 (12%) | dis_metrics |
| macro f1-score | 11 | 5.3 | 11 (5.3%) | dis_metrics |
| computational and resource metrics | 5 | 2.4 | 5 (2.4%) | dis_metrics |
| area under the roc curve (auc/auroc) | 4 | 1.9 | 4 (1.9%) | dis_metrics |
| loss (general) | 4 | 1.9 | 4 (1.9%) | dis_metrics |
| nlp-specific metrics | 4 | 1.9 | 4 (1.9%) | dis_metrics |
| correlation coefficients | 3 | 1.4 | 3 (1.4%) | dis_metrics |
| macro recall | 3 | 1.4 | 3 (1.4%) | dis_metrics |
| variability and confidence metrics | 3 | 1.4 | 3 (1.4%) | dis_metrics |
| entropy/divergence metrics | 2 | 1.0 | 2 (1%) | dis_metrics |
| kappa statistics | 2 | 1.0 | 2 (1%) | dis_metrics |
| macro precision | 2 | 1.0 | 2 (1%) | dis_metrics |
| other specific metrics | 2 | 1.0 | 2 (1%) | dis_metrics |
| recall at k (r@k) | 2 | 1.0 | 2 (1%) | dis_metrics |
| unweighted average recall (uar) | 2 | 1.0 | 2 (1%) | dis_metrics |
| weighted f1-score | 2 | 1.0 | 2 (1%) | dis_metrics |
| balanced accuracy | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| computational efficiency | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| confusion matrix | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| distance and similarity metrics | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| macro-averaged scores | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| mean absolute percentage error (mape) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| mean r-precision (mrp) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| mean reciprocal rank (mrr) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| micro f1-score | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| micro precision | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| micro recall | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| testing error | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| validation ratio | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| weighted f1-score<U+200B> | 1 | 0.5 | 1 (0.5%) | dis_metrics |
# Treemap charts of the distribution of datasets, models and performance metrixes used in multimodal works
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title,
gp = grid::gpar(fontsize = 14, fontface = "plain"),
just = "center")
layout <- rbind(
c(1, 1),
c(2, 3),
c(4, 4)
)
combined_plot <- gridExtra::grid.arrange(
title_grob,
GG_PLOTS_REGISTER[['dis_datasets']], GG_PLOTS_REGISTER[['dis_metrics']], GG_PLOTS_REGISTER[['dis_models']],
layout_matrix = layout,
heights = c(1, 10, 10) # tweak these ratios to taste
)
ggsave(
"fig-mm-datasets-models-met-pie.pdf",
plot = combined_plot,
width = 14,
height = 8
)
plot(combined_plot)
GG_PLOTS_REGISTER <- list()
# Rename the columns to make them easier to work with
multimodal_df_renamed <- multimodal_df %>%
rename(
test_set_used = 'Was a test set or cross-validation procedure used to determine the performance metrics values? Please write only yes, or no.',
statistical_tests = 'Were statistical tests used? Please write only yes, or no.',
other_analysis_methods = 'Did other methods of analyzing the outcomes, for example Bayesian, explainable machine learning methods, be used? Please write yes, or no.',
ablation_studies = 'Were ablation studies and/or comparisons with unimodal classifiers performed? Please write yes, or no.',
replication_info = 'Did the authors provide enough information (data, code) to allow for replication of the study? Please write only yes, or no.'
)
# Clean and standardize responses - convert everything to lower case and handle variations
multimodal_df_clean <- multimodal_df_renamed %>%
mutate(across(
c(
test_set_used,
statistical_tests,
other_analysis_methods,
ablation_studies,
replication_info
),
~ case_when(
tolower(.) %in% c("yes", "y") ~ "Yes",
tolower(.) %in% c("no", "n") ~ "No",
is.na(.) ~ "No",
TRUE ~ "No"
)
))
# Create better titles for each column
title_mapping <- c(
"test_set_used" = "Test Set or Cross-Validation Used",
"statistical_tests" = "Statistical Tests Used",
"other_analysis_methods" = "Alternative Analysis Methods Used",
"ablation_studies" = "Ablation Studies Performed",
"replication_info" = "Replication Information Provided"
)
summary_df <- create_summary_df(multimodal_df_clean, 'test_set_used')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['test_set_used'], global_palette = global_palette)
ggsave(
"fig-mm-eval-test-set-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['test_set_used']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['test_set_used']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 5 | 7.4 | 5 (7.4%) | test_set_used |
| Yes | 63 | 92.6 | 63 (92.6%) | test_set_used |
summary_df <- create_summary_df(multimodal_df_clean, 'statistical_tests')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['statistical_tests'], global_palette = global_palette)
ggsave("fig-mm-eval-stat-test-pie.pdf", plot = ggplot_chart, width = 14, height = 8)
GG_PLOTS_REGISTER[['statistical_tests']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['statistical_tests']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 60 | 88.2 | 60 (88.2%) | statistical_tests |
| Yes | 8 | 11.8 | 8 (11.8%) | statistical_tests |
summary_df <- create_summary_df(multimodal_df_clean, 'other_analysis_methods')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['other_analysis_methods'], global_palette = global_palette)
ggsave(
"fig-mm-eval-other-test-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['other_analysis_methods']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['other_analysis_methods']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 66 | 97.1 | 66 (97.1%) | other_analysis_methods |
| Yes | 2 | 2.9 | 2 (2.9%) | other_analysis_methods |
summary_df <- create_summary_df(multimodal_df_clean, 'ablation_studies')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['ablation_studies'], global_palette = global_palette)
ggsave(
"fig-mm-eval-ablation-study-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['ablation_studies']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['ablation_studies']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 4 | 5.9 | 4 (5.9%) | ablation_studies |
| Yes | 64 | 94.1 | 64 (94.1%) | ablation_studies |
summary_df <- create_summary_df(multimodal_df_clean, 'replication_info')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['replication_info'], global_palette = global_palette)
ggsave(
"fig-mm-eval-replication-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['replication_info']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['replication_info']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 50 | 73.5 | 50 (73.5%) | replication_info |
| Yes | 18 | 26.5 | 18 (26.5%) | replication_info |
# Pie charts of studies evaluation & replication
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title,
gp = grid::gpar(fontsize = 14, fontface = "plain"),
just = "center")
layout <- rbind(
c(1, NA, 2), # Top row: plot 1, empty, plot 2
c(NA, 3, NA), # Middle row: empty, plot 3, empty
c(4, NA, 5) # Bottom row: plot 4, empty, plot 5
)
combined_plot <- gridExtra::grid.arrange(
GG_PLOTS_REGISTER[['test_set_used']], # position 1: top-left
GG_PLOTS_REGISTER[['statistical_tests']], # position 2: top-right
GG_PLOTS_REGISTER[['other_analysis_methods']], # position 3: middle-center
GG_PLOTS_REGISTER[['ablation_studies']], # position 4: bottom-left
GG_PLOTS_REGISTER[['replication_info']], # position 5: bottom-right
layout_matrix = layout
)
combined_plot <- gridExtra::grid.arrange(
title_grob,
combined_plot,
ncol = 1,
heights = c(0.5, 10) # Title takes less space than the plots
)
ggsave(
"fig-mm-eval-test-set-tests-abl-rep-pie.pdf",
plot = combined_plot,
width = 14,
height = 8
)
plot(combined_plot)
multiview_df <- excel_df %>%
filter(bibtexkey %in% MULTIVIEW_BIBTEXKEY)
head(multiview_df)
## # A tibble: 6 x 21
## l.p doi bibtexkey `Article title` `Short note` Please summarize or ~1
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 3 10.1109/A~ gui2021 "Technology Fo~ "The study ~ "The paper \"Technolo~
## 2 4 10.18653/~ huc2021 "One-class Tex~ "The study ~ "The paper \"One-clas~
## 3 8 10.1016/j~ zhang2021 "Learning sent~ "The study ~ "The paper titled \"L~
## 4 10 10.1016/j~ liang2021 "Fusion of het~ "The study ~ "The paper titled \"F~
## 5 12 10.1109/A~ sus2021 "A Dynamic Dis~ "The study ~ "The paper presents a~
## 6 13 10.1111/e~ zhao2023 "Topic identif~ "The study ~ "The paper \"Topic id~
## # i abbreviated name:
## # 1: `Please summarize or provide the most important details of the work. Use a maximum of five sentences.`
## # i 15 more variables: `What are the findings?` <chr>,
## # `What are the challenges?` <chr>,
## # `Identified the datasets used in the article` <chr>,
## # `Disambiguated datasets names` <chr>,
## # `What other models were selected for comparison?` <chr>, ...
paste("Extracted", nrow(multiview_df), "multiview papers out of", nrow(excel_df), "total papers")
## [1] "Extracted 73 multiview papers out of 139 total papers"
year_multiview_df <- multiview_df %>%
mutate(bibtexkey = tolower(bibtexkey)) %>%
inner_join(
bib_df %>% mutate(BIBTEXKEY = tolower(BIBTEXKEY)) %>% select('BIBTEXKEY', 'YEAR'),
by = c("bibtexkey" = "BIBTEXKEY")
)
general_framework <- list()
general_framework[['When to use multi-view learning']] <- c('brefeldu2015', 'aminim2009')
general_framework[['Handling missing views']] <- c('zhangqi2024', 'doinychko2020', 'aminim2009')
general_framework[['Multi-view representation learning']] <- c('samya2023', 'sangy2022', 'jiax2021', 'liang2021', 'sus2021', 'yangp2014', 'zhang2021', 'maf2020', 'wangh2020', 'bhatt2019', 'chens2019', 'hoylea2019', 'wangh2019', 'ferreira2018', 'zhup2018', 'zhanz2017', 'perinaa2013', 'zhangb2013', 'zhangd2013', 'guyo2012', 'kovesim2012', 'yangp2012', 'zhengw2011', 'zhangb2008')
general_framework[['Multi-view data fusion']] <- c('fengz2024', 'jiz2024', 'xuy2024', 'varmanp2023', 'zhao2023', 'cgoncalves2022', 'liuj2022', 'luox2022', 'gui2021', 'huc2021', 'liuw2021', 'mmironczuk2020', 'mmironczuk2019', 'pengj2018', 'huz2017', 'sinorar2016', 'xuh2016', 'fakri2015', 'liuj2014', 'longg2013', 'lig2012', 'aminim2010', 'aminim2010b', 'suns2010', 'zhangx2010b', 'suns2008', 'matsubara2005', 'dasigiv2001')
summary_df <- create_bubble_df(year_multiview_df, general_framework)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(5))(5),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "General Framework",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "General Framework") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 5 | Handling missing views | 2009 | 1 |
| 25 | Handling missing views | 2020 | 1 |
| 33 | Handling missing views | 2024 | 1 |
| 1 | Multi-view data fusion | 2001 | 1 |
| 2 | Multi-view data fusion | 2005 | 1 |
| 3 | Multi-view data fusion | 2008 | 1 |
| 6 | Multi-view data fusion | 2009 | 1 |
| 8 | Multi-view data fusion | 2010 | 3 |
| 10 | Multi-view data fusion | 2012 | 1 |
| 12 | Multi-view data fusion | 2013 | 1 |
| 14 | Multi-view data fusion | 2014 | 1 |
| 16 | Multi-view data fusion | 2015 | 1 |
| 18 | Multi-view data fusion | 2016 | 2 |
| 19 | Multi-view data fusion | 2017 | 1 |
| 21 | Multi-view data fusion | 2018 | 1 |
| 23 | Multi-view data fusion | 2019 | 2 |
| 26 | Multi-view data fusion | 2020 | 1 |
| 28 | Multi-view data fusion | 2021 | 3 |
| 30 | Multi-view data fusion | 2022 | 4 |
| 34 | Multi-view data fusion | 2024 | 3 |
| 4 | Multi-view representation learning | 2008 | 1 |
| 9 | Multi-view representation learning | 2011 | 1 |
| 11 | Multi-view representation learning | 2012 | 3 |
| 13 | Multi-view representation learning | 2013 | 3 |
| 15 | Multi-view representation learning | 2014 | 1 |
| 20 | Multi-view representation learning | 2017 | 1 |
| 22 | Multi-view representation learning | 2018 | 2 |
| 24 | Multi-view representation learning | 2019 | 5 |
| 27 | Multi-view representation learning | 2020 | 1 |
| 29 | Multi-view representation learning | 2021 | 4 |
| 31 | Multi-view representation learning | 2022 | 1 |
| 32 | Multi-view representation learning | 2023 | 1 |
| 7 | When to use multi-view learning | 2009 | 1 |
| 17 | When to use multi-view learning | 2015 | 1 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "General Framework") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Handling missing views | 3 | 5.3 | 3 (5.3%) | group |
| Multi-view data fusion | 28 | 49.1 | 28 (49.1%) | group |
| Multi-view representation learning | 24 | 42.1 | 24 (42.1%) | group |
| When to use multi-view learning | 2 | 3.5 | 2 (3.5%) | group |
# 1. Wrap the title at ~60 characters per line
# Treemap of the Distribution of Works on Multimodal–General Framework (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mv-gen-frem-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
tasks <- list()
tasks[['Finance']] <- c('zhao2023')
tasks[['Social media analysis']] <- c('graffm2023', 'tianl2023', 'karisanip2022')
tasks[['Hate speech and offensive language detection']] <- c('graffm2023', 'lij2020')
tasks[['Fake news detection']] <- c('varmanp2023')
tasks[['Emotion recognition']] <- c('zhang2021')
tasks[['Sentiment analysis']] <- c('zhang2021', 'hoylea2019')
tasks[['Author profiling']] <- c('carmona2020')
tasks[['Technology forecasting']] <- c('gui2021')
tasks[['Multilingual text categorization']] <- c('sus2021', 'doinychko2020', 'maf2020', 'bhatt2019', 'zhanz2017', 'rajendran2016', 'fakri2015', 'guyo2012', 'kovesim2012', 'aminim2010', 'aminim2010b', 'aminim2009')
tasks[['Adversarial text classification']] <- c('lij2020')
tasks[['Lobbying disclosure analysis']] <- c('liaox2015')
tasks[['Software document classification']] <- c('liuj2014')
tasks[['Short text classification']] <- c('luox2022', 'longg2013')
tasks[['Web document classification']] <- c('mmironczuk2020', 'mmironczuk2019', 'suns2010', 'chenb2009', 'gup2009', 'zhangx2009', 'suns2008')
tasks[['News Article Classification']] <- c('dasigiv2001')
tasks[['Biomedical document, article analysis']] <- c('cgoncalves2022')
tasks[['Healthcare']] <- c('liuj2022', 'huz2017', 'xuh2016')
tasks[['Personality Prediction']] <- c('sangy2022')
tasks[['Extreme multi-label text classification']] <- c('chens2019')
tasks[['Other multi-view tasks']] <- c('akhtiamov2019', 'ferreira2018')
summary_df <- create_bubble_df(year_multiview_df, tasks)
base <- ggsci::pal_jco("default")(10)
extra <- c(lighten(base, 0.3), darken(base, 0.2))
palette_23 <- c(base, extra)[1:23]
global_palette <- stats::setNames(palette_23, unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Tasks",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Tasks") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 22 | Adversarial text classification | 2020 | 1 |
| 23 | Author profiling | 2020 | 1 |
| 31 | Biomedical document, article analysis | 2022 | 1 |
| 27 | Emotion recognition | 2021 | 1 |
| 17 | Extreme multi-label text classification | 2019 | 1 |
| 32 | Fake news detection | 2022 | 1 |
| 24 | Finance | 2020 | 1 |
| 25 | Hate speech and offensive language detection | 2020 | 1 |
| 37 | Hate speech and offensive language detection | 2023 | 1 |
| 12 | Healthcare | 2016 | 1 |
| 14 | Healthcare | 2017 | 1 |
| 33 | Healthcare | 2022 | 1 |
| 10 | Lobbying disclosure analysis | 2015 | 1 |
| 3 | Multilingual text categorization | 2009 | 2 |
| 5 | Multilingual text categorization | 2010 | 1 |
| 7 | Multilingual text categorization | 2012 | 2 |
| 11 | Multilingual text categorization | 2015 | 1 |
| 13 | Multilingual text categorization | 2016 | 1 |
| 15 | Multilingual text categorization | 2017 | 1 |
| 18 | Multilingual text categorization | 2019 | 1 |
| 26 | Multilingual text categorization | 2020 | 2 |
| 28 | Multilingual text categorization | 2021 | 1 |
| 1 | News Article Classification | 2001 | 1 |
| 16 | Other multi-view tasks | 2018 | 1 |
| 19 | Other multi-view tasks | 2019 | 1 |
| 34 | Personality Prediction | 2022 | 1 |
| 20 | Sentiment analysis | 2019 | 1 |
| 29 | Sentiment analysis | 2021 | 1 |
| 8 | Short text classification | 2013 | 1 |
| 35 | Short text classification | 2022 | 1 |
| 36 | Social media analysis | 2022 | 1 |
| 38 | Social media analysis | 2023 | 2 |
| 9 | Software document classification | 2014 | 1 |
| 30 | Technology forecasting | 2021 | 1 |
| 2 | Web document classification | 2008 | 1 |
| 4 | Web document classification | 2009 | 3 |
| 6 | Web document classification | 2010 | 1 |
| 21 | Web document classification | 2019 | 2 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Tasks") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Adversarial text classification | 1 | 2.2 | 1 (2.2%) | group |
| Author profiling | 1 | 2.2 | 1 (2.2%) | group |
| Biomedical document, article analysis | 1 | 2.2 | 1 (2.2%) | group |
| Emotion recognition | 1 | 2.2 | 1 (2.2%) | group |
| Extreme multi-label text classification | 1 | 2.2 | 1 (2.2%) | group |
| Fake news detection | 1 | 2.2 | 1 (2.2%) | group |
| Finance | 1 | 2.2 | 1 (2.2%) | group |
| Hate speech and offensive language detection | 2 | 4.4 | 2 (4.4%) | group |
| Healthcare | 3 | 6.7 | 3 (6.7%) | group |
| Lobbying disclosure analysis | 1 | 2.2 | 1 (2.2%) | group |
| Multilingual text categorization | 12 | 26.7 | 12 (26.7%) | group |
| News Article Classification | 1 | 2.2 | 1 (2.2%) | group |
| Other multi-view tasks | 2 | 4.4 | 2 (4.4%) | group |
| Personality Prediction | 1 | 2.2 | 1 (2.2%) | group |
| Sentiment analysis | 2 | 4.4 | 2 (4.4%) | group |
| Short text classification | 2 | 4.4 | 2 (4.4%) | group |
| Social media analysis | 3 | 6.7 | 3 (6.7%) | group |
| Software document classification | 1 | 2.2 | 1 (2.2%) | group |
| Technology forecasting | 1 | 2.2 | 1 (2.2%) | group |
| Web document classification | 7 | 15.6 | 7 (15.6%) | group |
# 1. Wrap the title at ~60 characters per line
# Treemap of the Distribution of Works on Multimodal–Task (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mv-task-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
classification_tasks <- list()
classification_tasks[['One-class Classification']] <- c('huc2021', 'chenb2009')
classification_tasks[['Binary Classification']] <- c('graffm2023', 'tianl2023', 'varmanp2023', 'cgoncalves2022', 'karisanip2022', 'liuj2022', 'sangy2022', 'carmona2020', 'lij2020', 'mmironczuk2020', 'akhtiamov2019', 'mmironczuk2019', 'pengj2018', 'huz2017', 'xux2016', 'brefeldu2015', 'liuj2014', 'yangp2014', 'longg2013', 'zhangb2013', 'zhangd2013', 'guyo2012', 'lig2012', 'yangp2012', 'aminim2010', 'aminim2010b', 'suns2010', 'aminim2009', 'suns2008', 'zhangb2008', 'matsubara2005')
classification_tasks[['Multi-class Classification']] <- c('jiz2024', 'xuy2024', 'zhangqi2024', 'samya2023', 'zhao2023', 'luox2022', 'gui2021', 'jiax2021', 'liang2021', 'sus2021', 'zhang2021', 'doinychko2020', 'maf2020', 'max2020', 'wangh2020', 'bhatt2019', 'hey2019', 'hoylea2019', 'wangh2019', 'ferreira2018', 'xuc2017', 'zhanz2017', 'iglesias2016', 'rajendran2016', 'sinorar2016', 'xuh2016', 'fakri2015', 'liy2013', 'perinaa2013', 'kovesim2012', 'zhengw2011', 'zhangx2010b', 'gup2009', 'zhangx2009', 'dasigiv2001')
classification_tasks[['Multi-label Classification']] <- c('fengz2024', 'liuw2021', 'zhang2021', 'chens2019', 'zhup2018', 'liaox2015')
summary_df <- create_bubble_df(year_multiview_df, classification_tasks)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(6))(6),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Type of Classification Task",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Type of Classification Task") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 2 | Binary Classification | 2005 | 1 |
| 3 | Binary Classification | 2008 | 2 |
| 4 | Binary Classification | 2009 | 2 |
| 7 | Binary Classification | 2010 | 2 |
| 10 | Binary Classification | 2012 | 3 |
| 12 | Binary Classification | 2013 | 3 |
| 14 | Binary Classification | 2014 | 2 |
| 15 | Binary Classification | 2015 | 1 |
| 18 | Binary Classification | 2016 | 1 |
| 20 | Binary Classification | 2017 | 1 |
| 22 | Binary Classification | 2018 | 1 |
| 25 | Binary Classification | 2019 | 3 |
| 28 | Binary Classification | 2020 | 2 |
| 33 | Binary Classification | 2022 | 5 |
| 35 | Binary Classification | 2023 | 2 |
| 1 | Multi-class Classification | 2001 | 1 |
| 5 | Multi-class Classification | 2009 | 2 |
| 8 | Multi-class Classification | 2010 | 1 |
| 9 | Multi-class Classification | 2011 | 1 |
| 11 | Multi-class Classification | 2012 | 1 |
| 13 | Multi-class Classification | 2013 | 2 |
| 16 | Multi-class Classification | 2015 | 1 |
| 19 | Multi-class Classification | 2016 | 4 |
| 21 | Multi-class Classification | 2017 | 2 |
| 23 | Multi-class Classification | 2018 | 1 |
| 26 | Multi-class Classification | 2019 | 5 |
| 29 | Multi-class Classification | 2020 | 4 |
| 30 | Multi-class Classification | 2021 | 5 |
| 34 | Multi-class Classification | 2022 | 1 |
| 36 | Multi-class Classification | 2023 | 1 |
| 37 | Multi-class Classification | 2024 | 3 |
| 17 | Multi-label Classification | 2015 | 1 |
| 24 | Multi-label Classification | 2018 | 1 |
| 27 | Multi-label Classification | 2019 | 1 |
| 31 | Multi-label Classification | 2021 | 2 |
| 38 | Multi-label Classification | 2024 | 1 |
| 6 | One-class Classification | 2009 | 1 |
| 32 | One-class Classification | 2021 | 1 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Type of Classification Task") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Binary Classification | 31 | 41.9 | 31 (41.9%) | group |
| Multi-class Classification | 35 | 47.3 | 35 (47.3%) | group |
| Multi-label Classification | 6 | 8.1 | 6 (8.1%) | group |
| One-class Classification | 2 | 2.7 | 2 (2.7%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Type of Classification Task (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 70
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mv-class-task-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
learning_paradigms <- list()
learning_paradigms[['Supervised Learning']] <- c('fengz2024', 'graffm2023', 'varmanp2023', 'zhao2023', 'cgoncalves2022', 'luox2022', 'gui2021', 'liang2021', 'liuw2021', 'sus2021', 'zhang2021', 'carmona2020', 'mmironczuk2020', 'akhtiamov2019', 'bhatt2019', 'chens2019', 'mmironczuk2019', 'ferreira2018', 'pengj2018', 'zhup2018', 'huz2017', 'xuc2017', 'zhanz2017', 'sinorar2016', 'xuh2016', 'liaox2015', 'liuj2014', 'liy2013', 'zhangd2013', 'kovesim2012', 'zhengw2011', 'aminim2010', 'zhangx2010b', 'aminim2009', 'dasigiv2001')
learning_paradigms[['Semi-supervised Learning']] <- c('jiz2024', 'zhangqi2024', 'huc2021', 'jiax2021', 'maf2020', 'iglesias2016', 'xux2016', 'brefeldu2015', 'fakri2015', 'longg2013', 'guyo2012', 'lig2012', 'aminim2010b', 'aminim2009', 'chenb2009', 'gup2009', 'suns2008', 'zhangb2008', 'matsubara2005')
learning_paradigms[['Active Learning']] <- c('jiz2024', 'karisanip2022', 'liuj2022', 'suns2010', 'gup2009', 'zhangx2009', 'suns2008')
learning_paradigms[['Transfer Learning']] <- c('fengz2024', 'bhatt2019', 'hey2019', 'rajendran2016', 'yangp2014', 'zhangb2013', 'yangp2012')
learning_paradigms[['Contrastive Learning']] <- c('samya2023')
learning_paradigms[['Graph Learning']] <- c('xuy2024')
learning_paradigms[['Meta Learning']] <- c('tianl2023')
learning_paradigms[['Few-shot Learning']] <- c('tianl2023')
learning_paradigms[['Adversarial Learning']] <- c('doinychko2020', 'lij2020')
summary_df <- create_bubble_df(year_multiview_df, learning_paradigms)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(9))(9),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Learning Paradigms",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Learning Paradigms") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 3 | Active Learning | 2008 | 1 |
| 5 | Active Learning | 2009 | 2 |
| 8 | Active Learning | 2010 | 1 |
| 34 | Active Learning | 2022 | 2 |
| 40 | Active Learning | 2024 | 1 |
| 29 | Adversarial Learning | 2020 | 2 |
| 36 | Contrastive Learning | 2023 | 1 |
| 37 | Few-shot Learning | 2023 | 1 |
| 41 | Graph Learning | 2024 | 1 |
| 38 | Meta Learning | 2023 | 1 |
| 2 | Semi-supervised Learning | 2005 | 1 |
| 4 | Semi-supervised Learning | 2008 | 2 |
| 6 | Semi-supervised Learning | 2009 | 3 |
| 9 | Semi-supervised Learning | 2010 | 1 |
| 12 | Semi-supervised Learning | 2012 | 2 |
| 15 | Semi-supervised Learning | 2013 | 1 |
| 20 | Semi-supervised Learning | 2015 | 2 |
| 22 | Semi-supervised Learning | 2016 | 2 |
| 30 | Semi-supervised Learning | 2020 | 1 |
| 32 | Semi-supervised Learning | 2021 | 2 |
| 42 | Semi-supervised Learning | 2024 | 2 |
| 1 | Supervised Learning | 2001 | 1 |
| 7 | Supervised Learning | 2009 | 2 |
| 10 | Supervised Learning | 2010 | 1 |
| 11 | Supervised Learning | 2011 | 1 |
| 13 | Supervised Learning | 2012 | 1 |
| 16 | Supervised Learning | 2013 | 2 |
| 18 | Supervised Learning | 2014 | 1 |
| 21 | Supervised Learning | 2015 | 1 |
| 23 | Supervised Learning | 2016 | 2 |
| 25 | Supervised Learning | 2017 | 3 |
| 26 | Supervised Learning | 2018 | 3 |
| 27 | Supervised Learning | 2019 | 5 |
| 31 | Supervised Learning | 2020 | 2 |
| 33 | Supervised Learning | 2021 | 5 |
| 35 | Supervised Learning | 2022 | 3 |
| 39 | Supervised Learning | 2023 | 1 |
| 43 | Supervised Learning | 2024 | 1 |
| 14 | Transfer Learning | 2012 | 1 |
| 17 | Transfer Learning | 2013 | 1 |
| 19 | Transfer Learning | 2014 | 1 |
| 24 | Transfer Learning | 2016 | 1 |
| 28 | Transfer Learning | 2019 | 2 |
| 44 | Transfer Learning | 2024 | 1 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Learning Paradigms") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Active Learning | 7 | 9.5 | 7 (9.5%) | group |
| Adversarial Learning | 2 | 2.7 | 2 (2.7%) | group |
| Contrastive Learning | 1 | 1.4 | 1 (1.4%) | group |
| Few-shot Learning | 1 | 1.4 | 1 (1.4%) | group |
| Graph Learning | 1 | 1.4 | 1 (1.4%) | group |
| Meta Learning | 1 | 1.4 | 1 (1.4%) | group |
| Semi-supervised Learning | 19 | 25.7 | 19 (25.7%) | group |
| Supervised Learning | 35 | 47.3 | 35 (47.3%) | group |
| Transfer Learning | 7 | 9.5 | 7 (9.5%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Learning Paradigms (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mv-learn-para-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
fusion_strategies <- list()
fusion_strategies[['Early Fusion']] <- c('jiz2024', 'xuy2024', 'zhangqi2024', 'samya2023', 'varmanp2023', 'luox2022', 'sangy2022', 'huc2021', 'jiax2021', 'liang2021', 'liuw2021', 'sus2021', 'zhang2021', 'carmona2020', 'lij2020', 'max2020', 'wangh2020', 'bhatt2019', 'wangh2019', 'ferreira2018', 'huz2017', 'xuc2017', 'zhanz2017', 'xuh2016', 'liy2013', 'zhangb2013', 'lig2012', 'zhengw2011', 'dasigiv2001')
fusion_strategies[['Late Fusion']] <- c('graffm2023', 'tianl2023', 'cgoncalves2022', 'gui2021', 'mmironczuk2020', 'akhtiamov2019', 'hoylea2019', 'mmironczuk2019', 'pengj2018', 'zhup2018', 'iglesias2016', 'sinorar2016', 'brefeldu2015', 'fakri2015', 'liaox2015', 'longg2013', 'kovesim2012', 'suns2010', 'zhangx2010b', 'zhangx2009', 'suns2008', 'zhangb2008', 'matsubara2005')
fusion_strategies[['Hybrid Fusion']] <- c('fengz2024','zhao2023', 'karisanip2022', 'liuj2022', 'doinychko2020', 'maf2020', 'chens2019', 'hey2019', 'rajendran2016', 'xux2016', 'liuj2014', 'zhangd2013', 'guyo2012', 'aminim2010', 'aminim2010b', 'chenb2009', 'gup2009')
summary_df <- create_bubble_df(year_multiview_df, fusion_strategies)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(6))(6),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Fusion Strategies",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Fusion Strategies") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 1 | Early Fusion | 2001 | 1 |
| 8 | Early Fusion | 2011 | 1 |
| 9 | Early Fusion | 2012 | 1 |
| 12 | Early Fusion | 2013 | 2 |
| 17 | Early Fusion | 2016 | 1 |
| 20 | Early Fusion | 2017 | 3 |
| 21 | Early Fusion | 2018 | 1 |
| 23 | Early Fusion | 2019 | 3 |
| 26 | Early Fusion | 2020 | 3 |
| 28 | Early Fusion | 2021 | 6 |
| 30 | Early Fusion | 2022 | 3 |
| 33 | Early Fusion | 2023 | 1 |
| 35 | Early Fusion | 2024 | 3 |
| 4 | Hybrid Fusion | 2009 | 3 |
| 6 | Hybrid Fusion | 2010 | 1 |
| 10 | Hybrid Fusion | 2012 | 1 |
| 13 | Hybrid Fusion | 2013 | 1 |
| 15 | Hybrid Fusion | 2014 | 1 |
| 18 | Hybrid Fusion | 2016 | 2 |
| 24 | Hybrid Fusion | 2019 | 2 |
| 27 | Hybrid Fusion | 2020 | 3 |
| 31 | Hybrid Fusion | 2022 | 2 |
| 36 | Hybrid Fusion | 2024 | 1 |
| 2 | Late Fusion | 2005 | 1 |
| 3 | Late Fusion | 2008 | 2 |
| 5 | Late Fusion | 2009 | 1 |
| 7 | Late Fusion | 2010 | 2 |
| 11 | Late Fusion | 2012 | 1 |
| 14 | Late Fusion | 2013 | 1 |
| 16 | Late Fusion | 2015 | 3 |
| 19 | Late Fusion | 2016 | 2 |
| 22 | Late Fusion | 2018 | 2 |
| 25 | Late Fusion | 2019 | 4 |
| 29 | Late Fusion | 2021 | 1 |
| 32 | Late Fusion | 2022 | 1 |
| 34 | Late Fusion | 2023 | 2 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Fusion Strategies") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Early Fusion | 29 | 42.0 | 29 (42%) | group |
| Hybrid Fusion | 17 | 24.6 | 17 (24.6%) | group |
| Late Fusion | 23 | 33.3 | 23 (33.3%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Fusion Strategies (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 70
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mv-fusion-start-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
fusion_techniques <- list()
fusion_techniques[['Machine Learning based Fusion']] <- c('graffm2023', 'zhao2023', 'cgoncalves2022', 'huc2021', 'sus2021', 'carmona2020', 'mmironczuk2020', 'wangh2020', 'akhtiamov2019', 'bhatt2019', 'hey2019', 'mmironczuk2019', 'wangh2019', 'pengj2018', 'zhup2018', 'zhanz2017', 'sinorar2016', 'xux2016', 'longg2013', 'zhangb2013', 'zhangd2013', 'lig2012', 'zhengw2011', 'suns2010', 'zhangx2009', 'suns2008')
fusion_techniques[['Probabilistic Fusion']] <- c('fengz2024', 'hoylea2019', 'iglesias2016', 'brefeldu2015', 'fakri2015', 'perinaa2013', 'aminim2010', 'aminim2010b', 'zhangx2010b', 'gup2009', 'zhangb2008')
fusion_techniques[['Neural Network Fusion']] <- c('jiz2024', 'zhangqi2024', 'tianl2023', 'varmanp2023', 'liuj2022', 'luox2022', 'gui2021', 'jiax2021', 'liuw2021', 'doinychko2020', 'lij2020', 'max2020', 'chens2019', 'huz2017', 'xuc2017', 'rajendran2016', 'xuh2016', 'dasigiv2001')
fusion_techniques[['Attention Fusion']] <- c('sangy2022', 'liang2021', 'zhang2021')
fusion_techniques[['Graph Fusion']] <- c('fengz2024', 'jiz2024', 'xuy2024', 'samya2023', 'lig2012')
summary_df <- create_bubble_df(year_multiview_df, fusion_techniques)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(6))(6),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Fusion Techniques",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Fusion Techniques") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 25 | Attention Fusion | 2021 | 2 |
| 28 | Attention Fusion | 2022 | 1 |
| 9 | Graph Fusion | 2012 | 1 |
| 31 | Graph Fusion | 2023 | 1 |
| 34 | Graph Fusion | 2024 | 3 |
| 2 | Machine Learning based Fusion | 2008 | 1 |
| 4 | Machine Learning based Fusion | 2009 | 1 |
| 6 | Machine Learning based Fusion | 2010 | 1 |
| 8 | Machine Learning based Fusion | 2011 | 1 |
| 10 | Machine Learning based Fusion | 2012 | 1 |
| 11 | Machine Learning based Fusion | 2013 | 3 |
| 14 | Machine Learning based Fusion | 2016 | 2 |
| 17 | Machine Learning based Fusion | 2017 | 1 |
| 19 | Machine Learning based Fusion | 2018 | 2 |
| 20 | Machine Learning based Fusion | 2019 | 7 |
| 23 | Machine Learning based Fusion | 2020 | 2 |
| 26 | Machine Learning based Fusion | 2021 | 2 |
| 29 | Machine Learning based Fusion | 2022 | 1 |
| 32 | Machine Learning based Fusion | 2023 | 1 |
| 1 | Neural Network Fusion | 2001 | 1 |
| 15 | Neural Network Fusion | 2016 | 2 |
| 18 | Neural Network Fusion | 2017 | 2 |
| 21 | Neural Network Fusion | 2019 | 1 |
| 24 | Neural Network Fusion | 2020 | 3 |
| 27 | Neural Network Fusion | 2021 | 3 |
| 30 | Neural Network Fusion | 2022 | 3 |
| 33 | Neural Network Fusion | 2023 | 1 |
| 35 | Neural Network Fusion | 2024 | 2 |
| 3 | Probabilistic Fusion | 2008 | 1 |
| 5 | Probabilistic Fusion | 2009 | 2 |
| 7 | Probabilistic Fusion | 2010 | 2 |
| 12 | Probabilistic Fusion | 2013 | 1 |
| 13 | Probabilistic Fusion | 2015 | 2 |
| 16 | Probabilistic Fusion | 2016 | 1 |
| 22 | Probabilistic Fusion | 2019 | 1 |
| 36 | Probabilistic Fusion | 2024 | 1 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Fusion Techniques") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Attention Fusion | 3 | 4.8 | 3 (4.8%) | group |
| Graph Fusion | 5 | 7.9 | 5 (7.9%) | group |
| Machine Learning based Fusion | 26 | 41.3 | 26 (41.3%) | group |
| Neural Network Fusion | 18 | 28.6 | 18 (28.6%) | group |
| Probabilistic Fusion | 11 | 17.5 | 11 (17.5%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Fusion Techniques (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 70
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mv-fusion-tech-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
datasets <- list()
datasets[['Public Benchmark Datasets']] <- c('fengz2024', 'jiz2024', 'xuy2024', 'zhangqi2024', 'samya2023', 'varmanp2023', 'luox2022', 'huc2021', 'jiax2021', 'liang2021', 'liuw2021', 'zhang2021', 'doinychko2020', 'max2020', 'wangh2020', 'bhatt2019', 'chens2019', 'hey2019', 'hoylea2019', 'wangh2019', 'ferreira2018', 'pengj2018', 'zhup2018', 'xuc2017', 'iglesias2016', 'rajendran2016', 'sinorar2016', 'xuh2016', 'xux2016', 'brefeldu2015', 'fakri2015', 'liuj2014', 'yangp2014', 'liy2013', 'longg2013', 'perinaa2013', 'zhangb2013', 'zhangd2013', 'yangp2012', 'zhengw2011', 'aminim2010', 'aminim2010b', 'aminim2009', 'chenb2009', 'gup2009', 'zhangx2009', 'zhangb2008', 'matsubara2005', 'dasigiv2001')
datasets[['Domain-specific Datasets']] <- c('graffm2023', 'zhao2023', 'cgoncalves2022', 'liuj2022', 'sangy2022', 'gui2021', 'carmona2020', 'lij2020', 'mmironczuk2020', 'akhtiamov2019', 'mmironczuk2019', 'huz2017', 'liaox2015', 'suns2010', 'suns2008')
datasets[['Multilingual Datasets']] <- c('tianl2023', 'sus2021', 'carmona2020', 'maf2020', 'maf2020', 'bhatt2019', 'zhanz2017', 'guyo2012', 'kovesim2012', 'aminim2010', 'aminim2010b', 'aminim2009')
summary_df <- create_bubble_df(year_multiview_df, datasets)
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(6))(6),
unique(summary_df$group))
bubble_plot <- create_bubble_plot(summary_df,
title = "Datasets",
subtitle = "Distribution of papers by category and publication year",
global_palette = global_palette)
bubble_plot
knitr::kable(summary_df, caption = "Datasets") %>% kableExtra::kable_styling()
| group | YEAR | n | |
|---|---|---|---|
| 3 | Domain-specific Datasets | 2008 | 1 |
| 7 | Domain-specific Datasets | 2010 | 1 |
| 15 | Domain-specific Datasets | 2015 | 1 |
| 18 | Domain-specific Datasets | 2017 | 1 |
| 22 | Domain-specific Datasets | 2019 | 3 |
| 25 | Domain-specific Datasets | 2020 | 3 |
| 28 | Domain-specific Datasets | 2021 | 1 |
| 31 | Domain-specific Datasets | 2022 | 3 |
| 33 | Domain-specific Datasets | 2023 | 1 |
| 5 | Multilingual Datasets | 2009 | 2 |
| 8 | Multilingual Datasets | 2010 | 1 |
| 11 | Multilingual Datasets | 2012 | 2 |
| 19 | Multilingual Datasets | 2017 | 1 |
| 23 | Multilingual Datasets | 2019 | 1 |
| 26 | Multilingual Datasets | 2020 | 3 |
| 29 | Multilingual Datasets | 2021 | 1 |
| 34 | Multilingual Datasets | 2023 | 1 |
| 1 | Public Benchmark Datasets | 2001 | 1 |
| 2 | Public Benchmark Datasets | 2005 | 1 |
| 4 | Public Benchmark Datasets | 2008 | 1 |
| 6 | Public Benchmark Datasets | 2009 | 5 |
| 9 | Public Benchmark Datasets | 2010 | 1 |
| 10 | Public Benchmark Datasets | 2011 | 1 |
| 12 | Public Benchmark Datasets | 2012 | 1 |
| 13 | Public Benchmark Datasets | 2013 | 5 |
| 14 | Public Benchmark Datasets | 2014 | 2 |
| 16 | Public Benchmark Datasets | 2015 | 2 |
| 17 | Public Benchmark Datasets | 2016 | 5 |
| 20 | Public Benchmark Datasets | 2017 | 1 |
| 21 | Public Benchmark Datasets | 2018 | 3 |
| 24 | Public Benchmark Datasets | 2019 | 6 |
| 27 | Public Benchmark Datasets | 2020 | 2 |
| 30 | Public Benchmark Datasets | 2021 | 5 |
| 32 | Public Benchmark Datasets | 2022 | 2 |
| 35 | Public Benchmark Datasets | 2023 | 1 |
| 36 | Public Benchmark Datasets | 2024 | 4 |
unpacked_df <- summary_df[rep(row.names(summary_df), summary_df$n), c("group", "YEAR")]
unpacked_summary_df <- create_summary_df(unpacked_df, "group")
ggplot_chart <- create_treemap_chart(
summary_df = unpacked_summary_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggplot_chart
knitr::kable(unpacked_summary_df, caption = "Datasets") %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| Domain-specific Datasets | 15 | 19.7 | 15 (19.7%) | group |
| Multilingual Datasets | 12 | 15.8 | 12 (15.8%) | group |
| Public Benchmark Datasets | 49 | 64.5 | 49 (64.5%) | group |
# 1. Wrap the title
# Treemap of the Distribution of Works on Datasets (Left), and Annual Distribution of These Works (Right)
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title, gp = grid::gpar(fontsize = 16, fontface = "bold"), hjust = 0.5)
# 2. Extract the FILL legend (from treemap)
treemap_for_fill_legend <- ggplot_chart +
guides(
fill = guide_legend(
ncol = 8,
byrow = TRUE,
keywidth = unit(0.6, "cm"),
keyheight = unit(0.6, "cm"),
title = "Group",
title.position = "top",
title.hjust = 0.5
)
) +
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 8),
legend.spacing.x = unit(0.15, "cm"),
legend.spacing.y = unit(0.1, "cm"),
legend.background = element_rect(fill="transparent", colour = NA), # Ensure no box around fill legend either
legend.box.background = element_rect(fill="transparent", colour = NA),
legend.box.margin = margin(t = 5, r = 5, b = 5, l = 0) # Margin for fill legend
)
g_fill <- ggplotGrob(treemap_for_fill_legend)
fill_legend_grob <- g_fill$grobs[[which(sapply(g_fill$grobs, function(x) x$name) == "guide-box")]]
# 3. Extract the SIZE legend (from bubble plot, horizontal, no box)
bubble_plot_for_size_legend <- bubble_plot +
guides(
fill = "none",
size = guide_legend(
title = "Count",
direction = "horizontal",
title.position = "top",
label.position = "bottom",
keywidth = unit(1.2, "cm"),
keyheight = unit(0.8, "cm"),
label.hjust = 0.5,
title.hjust = 0.5,
override.aes = list(fill = "grey70", color = NA, alpha = 0.8, shape = 21) # color = NA for no border on keys
)
) +
theme(
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_text(size = 10, face = "plain"),
legend.text = element_text(size = 9),
legend.background = element_rect(fill="transparent", colour = NA), # No fill, no border for overall legend
legend.box.background = element_rect(fill="transparent", colour = NA), # No fill, no border for the guide box
legend.key = element_rect(fill="transparent", colour = NA), # No fill/border for individual key backgrounds
legend.spacing.x = unit(0.1, "cm"), # Tighter spacing for horizontal legend
legend.box.margin = margin(t = 5, r = 0, b = 5, l = 5) # Margin for size legend
)
g_size <- ggplotGrob(bubble_plot_for_size_legend)
size_legend_grob <- g_size$grobs[[which(sapply(g_size$grobs, function(x) x$name) == "guide-box")]]
# Check if legends were found
if(is.null(fill_legend_grob)) stop("Fill legend not found!")
if(is.null(size_legend_grob)) stop("Size legend not found!")
# 4. Combine the two extracted legends HORIZONTALLY
# Give fill legend more space.
combined_legends_grob <- arrangeGrob(
fill_legend_grob,
size_legend_grob,
ncol = 2,
widths = unit(c(0.65, 0.35), "npc") # Fill legend gets 65%, Size legend gets 35% of the horizontal space for legends
# Adjust these proportions as needed.
)
# 5. Remove legends from the original plots for the main panel
ggplot_chart_no_legend <- ggplot_chart + theme(legend.position = "none")
bubble_plot_no_legend <- bubble_plot +
theme(legend.position = "none",
plot.title = element_blank(),
plot.subtitle = element_blank())
# 6. Arrange the plots in a 2-column layout
plots_row <- arrangeGrob(
ggplot_chart_no_legend,
bubble_plot_no_legend,
ncol = 2
)
# 7. Create the combined plot
# The height of the legend panel will be determined by the taller of the two (multi-row fill legend)
title_height_est <- grobHeight(title_grob) + unit(0.2, "inches")
# Calculate height of the combined horizontal legend panel
# It will be the height of the fill legend (which is multi-row)
legend_panel_actual_height <- grobHeight(fill_legend_grob)
final_legend_panel_height <- legend_panel_actual_height + unit(0.3, "inches") # Add padding
combined_plot_final <- grid.arrange(
title_grob,
plots_row,
combined_legends_grob,
ncol = 1,
heights = unit.c(title_height_est,
unit(1, "null"),
final_legend_panel_height)
)
ggsave(
"fig-mv-datasets-pie-bubble.pdf",
plot = combined_plot_final,
width = 14,
height = 8
)
GG_PLOTS_REGISTER <- list()
# Rename the columns to make them easier to work with
df_renamed <- multiview_df %>%
rename(
datasets = 'Identified the datasets used in the article',
dis_datasets = 'Disambiguated datasets names',
models = 'What other models were selected for comparison?',
dis_models = 'Disambiguated models names',
metrics = 'Identified performance metrics used in the article',
dis_metrics = 'Disambiguated performance metrics names'
)
# Create better titles for each column
title_mapping <- c(
"dis_datasets" = "Disambiguated datasets names",
"dis_models" = "Disambiguated models names",
"dis_metrics" = "Disambiguated performance metrics names"
)
# Clean and standardize responses - convert everything to lower case and handle variations
dis_df <- df_renamed %>%
select('l.p', 'dis_datasets') %>%
separate_rows('dis_datasets', sep = "\\r?\\n") %>%
mutate(
dis_datasets = stringr::str_to_lower(dis_datasets),
dis_datasets = stringr::str_trim(dis_datasets),
dis_datasets = na_if(dis_datasets, "")
)
summary_df <- create_summary_df(dis_df, 'dis_datasets') %>%
arrange(desc(count)) %>% filter(!is.na(group))
summary_cuted_df <- summary_df %>%
# 1) lump any low‑frequency group into "other"
mutate(group = if_else(count < 5, "other", group)) %>%
# 2) re‑aggregate by the (possibly new) group
group_by(group, column) %>%
summarise(count = sum(count), .groups = "drop") %>%
# 3) recompute percentages and labels over the new totals
mutate(percentage = count / sum(count) * 100,
label = paste0(count, " \n (", round(percentage, 1), "%)")) %>%
# 4) order by descending count
arrange(desc(count))
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(7))(7),
unique(summary_cuted_df$group))
ggplot_chart <- create_treemap_chart(
summary_df = summary_cuted_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggsave(
"fig-mv-eval-datasets-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['dis_datasets']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['dis_datasets']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| reuters corpora | 29 | 11.9 | 29 (11.9%) | dis_datasets |
| 20 newsgroups | 12 | 4.9 | 12 (4.9%) | dis_datasets |
| webkb | 8 | 3.3 | 8 (3.3%) | dis_datasets |
| imdb movie reviews | 6 | 2.5 | 6 (2.5%) | dis_datasets |
| yelp reviews | 5 | 2.1 | 5 (2.1%) | dis_datasets |
| ag news | 4 | 1.6 | 4 (1.6%) | dis_datasets |
| amazon reviews | 4 | 1.6 | 4 (1.6%) | dis_datasets |
| cora | 4 | 1.6 | 4 (1.6%) | dis_datasets |
| mr | 4 | 1.6 | 4 (1.6%) | dis_datasets |
| twitter data | 4 | 1.6 | 4 (1.6%) | dis_datasets |
| yale faces | 4 | 1.6 | 4 (1.6%) | dis_datasets |
| bbc news | 3 | 1.2 | 3 (1.2%) | dis_datasets |
| corel image collections | 3 | 1.2 | 3 (1.2%) | dis_datasets |
| nus-wide dataset family | 3 | 1.2 | 3 (1.2%) | dis_datasets |
| ohsumed corpus | 3 | 1.2 | 3 (1.2%) | dis_datasets |
| 3sources | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| github | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| holidays image dataset | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| inventorum | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| mnist | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| sogou news/text | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| thucnews | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| trec question/text classification | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| web of science | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| wikipedia data | 2 | 0.8 | 2 (0.8%) | dis_datasets |
| aapd (arxiv academic paper dataset) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| abusive ugc (abuse) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| acl | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| ads data set | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| adverse drug reactions (adr) dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| aep | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| air | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| aloi | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| animal | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| arem | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| australian | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| bioid | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| biometrics data | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| breast cancer | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| bs-top4 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| caltech101-20 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| citeseer data | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| clinical interview fragments (motivational interviewing-based weight loss sessions) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| collab | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| comparable multilingual corpus | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| cub | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| customer review (cr) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| da-vincis challenge data | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| dd | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| deezer | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| digits data | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| douban movie short comments (dmsc) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| east money | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| elec | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| emotion classification (ec) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| emotion text (twt-13) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| enzymes | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| espgame | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| eurlex-4k | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| europarl v7 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| 1 | 0.4 | 1 (0.4%) | dis_datasets | |
| fakenews | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| fudan | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| gas | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| haodf online dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| hate speech (hate-3) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| health-related forum dataset (approximately 2 million sentences)<U+200B> | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| iaprtc12 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| iclr | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| iflytek | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| image recognition dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| inews | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| insider threat detection (itd) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| internet advertisements (ad) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| internet movie script database (imsdb) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| ionosphere | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| kaggle | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| lnai dataset (lecture notes in artificial intelligence dataset). | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| lobbying disclosure act (lda) dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| lpa | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| mag-cs | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| mastercook recipes dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| mcintire | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| medline citations | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| mexican twitter corpus (mex-a3t-500) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| mirflickr | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| movie review (mr) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| mscoco dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| multidom | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| multilingual ted corpus | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| multilingual text (mtext) data | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| multimedia web image-text database | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| narrativeqa | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| nasdaq | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| nci1 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| newswire articles written in 5 languages (english, french, german, italian, spanish) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| nytimes (nyt) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| object detection dataset (pascal voc 2007) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| observation dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| office-caltech | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| orl | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| out-scene | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| page-blocks | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| pascal07 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| people | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| person re-identification dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| pima | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| pm | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| pornographic ugc (porn) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| product consumption dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| product review data set | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| proteins | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| pubmed | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| rec | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| reddit data | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| rumour detection dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| same-3 dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| scy-cluster | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| scy-genes | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| se-absa15 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| search snippets dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| semeval | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| smart video corpus (svc) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| snippets | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| spambase | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| stanford sentiment treebank (sst) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| story2personality | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| subjectivity dataset (subj) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| synthetic data set | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| synthetic datasets | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| tagmynews | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| the article does not explicitly mention the names of any specific datasets used. | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| the personality database (pdb) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| tiger corpus | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| topic | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| toutiao | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| tweet | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| uid | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| umist | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| w-1000 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| w-200 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| w-2101 | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| waveform | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| web advertisement images dataset | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| web of science (wos) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| welfake | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| wiki-30k | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| wikics | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| wikiner corpus | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| x-ray microbeam speech data (xrmb) | 1 | 0.4 | 1 (0.4%) | dis_datasets |
| yeast data | 1 | 0.4 | 1 (0.4%) | dis_datasets |
# Clean and standardize responses - convert everything to lower case and handle variations
dis_df <- df_renamed %>%
select('l.p', 'dis_models') %>%
separate_rows('dis_models', sep = "\\r?\\n") %>%
mutate(
dis_models = stringr::str_to_lower(dis_models),
dis_models = stringr::str_trim(dis_models),
dis_models = na_if(dis_models, "")
)
summary_df <- create_summary_df(dis_df, 'dis_models') %>%
arrange(desc(count)) %>% filter(!is.na(group))
summary_cuted_df <- summary_df %>%
# 1) lump any low‑frequency group into "other"
mutate(group = if_else(count < 20, "other", group)) %>%
# 2) re‑aggregate by the (possibly new) group
group_by(group, column) %>%
summarise(count = sum(count), .groups = "drop") %>%
# 3) recompute percentages and labels over the new totals
mutate(percentage = count / sum(count) * 100,
label = paste0(count, " (", round(percentage, 1), "%)")) %>%
# 4) order by descending count
arrange(desc(count))
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(7))(7),
unique(summary_cuted_df$group))
ggplot_chart <- create_treemap_chart(
summary_df = summary_cuted_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggsave(
"fig-mv-eval-models-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['dis_models']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['dis_models']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| multi-modal/multi-view specific architectures & techniques | 85 | 23.2 | 85 (23.2%) | dis_models |
| support vector machine (svm) | 50 | 13.6 | 50 (13.6%) | dis_models |
| traditional/statistical machine learning & other methods | 35 | 9.5 | 35 (9.5%) | dis_models |
| graph neural networks (gnn) | 24 | 6.5 | 24 (6.5%) | dis_models |
| word embeddings and text representation | 18 | 4.9 | 18 (4.9%) | dis_models |
| bert (bidirectional encoder representations from transformers) | 15 | 4.1 | 15 (4.1%) | dis_models |
| convolutional neural networks (cnn) | 14 | 3.8 | 14 (3.8%) | dis_models |
| boosting methods | 13 | 3.5 | 13 (3.5%) | dis_models |
| long short-term memory (lstm) | 13 | 3.5 | 13 (3.5%) | dis_models |
| naive bayes | 13 | 3.5 | 13 (3.5%) | dis_models |
| correlation analysis (cca) family | 10 | 2.7 | 10 (2.7%) | dis_models |
| k-nearest neighbors (knn) | 9 | 2.5 | 9 (2.5%) | dis_models |
| self-attention / attention | 8 | 2.2 | 8 (2.2%) | dis_models |
| decision tree | 5 | 1.4 | 5 (1.4%) | dis_models |
| logistic regression | 5 | 1.4 | 5 (1.4%) | dis_models |
| gated recurrent unit (gru) | 4 | 1.1 | 4 (1.1%) | dis_models |
| graph attention network (gat) | 4 | 1.1 | 4 (1.1%) | dis_models |
| graph convolutional network (gcn) | 4 | 1.1 | 4 (1.1%) | dis_models |
| neural network | 4 | 1.1 | 4 (1.1%) | dis_models |
| recurrent neural network (rnn) | 4 | 1.1 | 4 (1.1%) | dis_models |
| autoencoders | 2 | 0.5 | 2 (0.5%) | dis_models |
| capsule network | 2 | 0.5 | 2 (0.5%) | dis_models |
| random forest | 2 | 0.5 | 2 (0.5%) | dis_models |
| traditional/statistical machine learning & other methods<U+200B> | 2 | 0.5 | 2 (0.5%) | dis_models |
| transformer-based architectures | 2 | 0.5 | 2 (0.5%) | dis_models |
| adbert | 1 | 0.3 | 1 (0.3%) | dis_models |
| alexnet | 1 | 0.3 | 1 (0.3%) | dis_models |
| arima | 1 | 0.3 | 1 (0.3%) | dis_models |
| bayesian network (bayes) | 1 | 0.3 | 1 (0.3%) | dis_models |
| fasttext | 1 | 0.3 | 1 (0.3%) | dis_models |
| latent dirichlet allocation (lda) | 1 | 0.3 | 1 (0.3%) | dis_models |
| latent semantic analysis (lsa) | 1 | 0.3 | 1 (0.3%) | dis_models |
| logistic regression (lg) | 1 | 0.3 | 1 (0.3%) | dis_models |
| long short term memory (lstm) | 1 | 0.3 | 1 (0.3%) | dis_models |
| roberta (robustly optimized bert pretraining approach) | 1 | 0.3 | 1 (0.3%) | dis_models |
| support vector machines (svm) | 1 | 0.3 | 1 (0.3%) | dis_models |
| textrcnn | 1 | 0.3 | 1 (0.3%) | dis_models |
| xgboost | 1 | 0.3 | 1 (0.3%) | dis_models |
# Clean and standardize responses - convert everything to lower case and handle variations
dis_df <- df_renamed %>%
select('l.p', 'dis_metrics') %>%
separate_rows('dis_metrics', sep = "\\r?\\n") %>%
mutate(
dis_metrics = stringr::str_to_lower(dis_metrics),
dis_metrics = stringr::str_trim(dis_metrics),
dis_metrics = na_if(dis_metrics, "")
)
summary_df <- create_summary_df(dis_df, 'dis_metrics') %>%
arrange(desc(count)) %>% filter(!is.na(group))
summary_cuted_df <- summary_df %>%
# 1) lump any low‑frequency group into "other"
mutate(group = if_else(count < 11, "other", group)) %>%
# 2) re‑aggregate by the (possibly new) group
group_by(group, column) %>%
summarise(count = sum(count), .groups = "drop") %>%
# 3) recompute percentages and labels over the new totals
mutate(percentage = count / sum(count) * 100,
label = paste0(count, " (", round(percentage, 1), "%)")) %>%
# 4) order by descending count
arrange(desc(count))
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(7))(7),
unique(summary_cuted_df$group))
ggplot_chart <- create_treemap_chart(
summary_df = summary_cuted_df,
title = "",
subtitle = "",
global_palette = global_palette,
text_size = 11 # Adjust as needed
)
ggsave(
"fig-mv-eval-per-met-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['dis_metrics']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['dis_metrics']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| accuracy | 49 | 22.2 | 49 (22.2%) | dis_metrics |
| f1-score | 34 | 15.4 | 34 (15.4%) | dis_metrics |
| precision | 20 | 9.0 | 20 (9%) | dis_metrics |
| recall | 17 | 7.7 | 17 (7.7%) | dis_metrics |
| area under the roc curve (auc/auroc) | 9 | 4.1 | 9 (4.1%) | dis_metrics |
| classification error rate | 7 | 3.2 | 7 (3.2%) | dis_metrics |
| other specific metrics | 7 | 3.2 | 7 (3.2%) | dis_metrics |
| hierarchical classification metrics | 6 | 2.7 | 6 (2.7%) | dis_metrics |
| evaluation aspects/concepts | 5 | 2.3 | 5 (2.3%) | dis_metrics |
| macro f1-score | 5 | 2.3 | 5 (2.3%) | dis_metrics |
| mean average precision (map) | 5 | 2.3 | 5 (2.3%) | dis_metrics |
| statistical tests | 5 | 2.3 | 5 (2.3%) | dis_metrics |
| micro f1-score | 4 | 1.8 | 4 (1.8%) | dis_metrics |
| variability and confidence metrics | 4 | 1.8 | 4 (1.8%) | dis_metrics |
| computational and resource metrics | 3 | 1.4 | 3 (1.4%) | dis_metrics |
| correlation coefficients | 3 | 1.4 | 3 (1.4%) | dis_metrics |
| distance and similarity metrics | 3 | 1.4 | 3 (1.4%) | dis_metrics |
| average precision (ap) | 2 | 0.9 | 2 (0.9%) | dis_metrics |
| hamming loss | 2 | 0.9 | 2 (0.9%) | dis_metrics |
| kappa statistics | 2 | 0.9 | 2 (0.9%) | dis_metrics |
| confusion matrix | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| convergence rate | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| cross-entropy loss | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| error | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| error rate | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| generalization capability | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| log-likelihood | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| loss (general) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| macro-averaging | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| macro-f1 score | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| mean absolute percentage error (mape) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| mean error | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| mean reciprocal rank (mrr) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| mean square error (mse) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| micro-averaging | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| nlp-specific metrics | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| normalized discounted cumulative gain (ndcg) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| pixel error | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| precision at k (p@k) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| recall at k (r@k) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| recall rate | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| recall<U+200B> | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| root mean square error (rmse) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| stability | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| test performance | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| unweighted average recall (uar) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
| word error rate (wer) | 1 | 0.5 | 1 (0.5%) | dis_metrics |
# Treemap charts of the distribution of datasets, models and performance metrixes used in multiview works
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title,
gp = grid::gpar(fontsize = 14, fontface = "plain"),
just = "center")
layout <- rbind(
c(1, 1),
c(2, 3),
c(4, 4)
)
combined_plot <- gridExtra::grid.arrange(
title_grob,
GG_PLOTS_REGISTER[['dis_datasets']], GG_PLOTS_REGISTER[['dis_metrics']], GG_PLOTS_REGISTER[['dis_models']],
layout_matrix = layout,
heights = c(1, 10, 10) # tweak these ratios to taste
)
ggsave(
"fig-mv-datasets-models-met-pie.pdf",
plot = combined_plot,
width = 14,
height = 8
)
plot(combined_plot)
GG_PLOTS_REGISTER <- list()
# Rename the columns to make them easier to work with
df_renamed <- multiview_df %>%
rename(
test_set_used = 'Was a test set or cross-validation procedure used to determine the performance metrics values? Please write only yes, or no.',
statistical_tests = 'Were statistical tests used? Please write only yes, or no.',
other_analysis_methods = 'Did other methods of analyzing the outcomes, for example Bayesian, explainable machine learning methods, be used? Please write yes, or no.',
ablation_studies = 'Were ablation studies and/or comparisons with unimodal classifiers performed? Please write yes, or no.',
replication_info = 'Did the authors provide enough information (data, code) to allow for replication of the study? Please write only yes, or no.'
)
# Clean and standardize responses - convert everything to lower case and handle variations
df_clean <- df_renamed %>%
mutate(across(
c(
test_set_used,
statistical_tests,
other_analysis_methods,
ablation_studies,
replication_info
),
~ case_when(
tolower(.) %in% c("yes", "y") ~ "Yes",
tolower(.) %in% c("no", "n") ~ "No",
is.na(.) ~ "No",
TRUE ~ "No"
)
))
# Create better titles for each column
title_mapping <- c(
"test_set_used" = "Test Set or Cross-Validation Used",
"statistical_tests" = "Statistical Tests Used",
"other_analysis_methods" = "Alternative Analysis Methods Used",
"ablation_studies" = "Ablation Studies Performed",
"replication_info" = "Replication Information Provided"
)
summary_df <- create_summary_df(df_clean, 'test_set_used')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['test_set_used'], global_palette = global_palette)
ggsave(
"fig-mv-eval-test-set-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['test_set_used']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['test_set_used']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 1 | 1.4 | 1 (1.4%) | test_set_used |
| Yes | 72 | 98.6 | 72 (98.6%) | test_set_used |
summary_df <- create_summary_df(df_clean, 'statistical_tests')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['statistical_tests'], global_palette = global_palette)
ggsave("fig-mv-eval-stat-test-pie.pdf", plot = ggplot_chart, width = 14, height = 8)
GG_PLOTS_REGISTER[['statistical_tests']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['statistical_tests']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 56 | 76.7 | 56 (76.7%) | statistical_tests |
| Yes | 17 | 23.3 | 17 (23.3%) | statistical_tests |
summary_df <- create_summary_df(df_clean, 'other_analysis_methods')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['other_analysis_methods'], global_palette = global_palette)
ggsave(
"fig-mv-eval-other-test-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['other_analysis_methods']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['other_analysis_methods']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 73 | 100 | 73 (100%) | other_analysis_methods |
summary_df <- create_summary_df(df_clean, 'ablation_studies')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['ablation_studies'], global_palette = global_palette)
ggsave(
"fig-mv-eval-ablation-study-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['ablation_studies']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['ablation_studies']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 14 | 19.2 | 14 (19.2%) | ablation_studies |
| Yes | 59 | 80.8 | 59 (80.8%) | ablation_studies |
summary_df <- create_summary_df(df_clean, 'replication_info')
global_palette <- stats::setNames(grDevices::colorRampPalette(ggsci::pal_jco("default")(2))(2),
unique(summary_df$group))
ggplot_chart <- create_pie_chart(summary_df, title_mapping['replication_info'], global_palette = global_palette)
ggsave(
"fig-mv-eval-replication-pie.pdf",
plot = ggplot_chart,
width = 14,
height = 8
)
GG_PLOTS_REGISTER[['replication_info']] <- ggplot_chart
ggplot_chart
knitr::kable(summary_df, caption = title_mapping['replication_info']) %>% kableExtra::kable_styling()
| group | count | percentage | label | column |
|---|---|---|---|---|
| No | 61 | 83.6 | 61 (83.6%) | replication_info |
| Yes | 12 | 16.4 | 12 (16.4%) | replication_info |
# Pie charts of studies evaluation & replication
wrapped_title <- stringr::str_wrap(
"",
width = 60
)
title_grob <- grid::textGrob(wrapped_title,
gp = grid::gpar(fontsize = 14, fontface = "plain"),
just = "center")
layout <- rbind(
c(1, NA, 2), # Top row: plot 1, empty, plot 2
c(NA, 3, NA), # Middle row: empty, plot 3, empty
c(4, NA, 5) # Bottom row: plot 4, empty, plot 5
)
combined_plot <- gridExtra::grid.arrange(
GG_PLOTS_REGISTER[['test_set_used']], # position 1: top-left
GG_PLOTS_REGISTER[['statistical_tests']], # position 2: top-right
GG_PLOTS_REGISTER[['other_analysis_methods']], # position 3: middle-center
GG_PLOTS_REGISTER[['ablation_studies']], # position 4: bottom-left
GG_PLOTS_REGISTER[['replication_info']], # position 5: bottom-right
layout_matrix = layout
)
combined_plot <- gridExtra::grid.arrange(
title_grob,
combined_plot,
ncol = 1,
heights = c(0.5, 10) # Title takes less space than the plots
)
ggsave(
"fig-mv-eval-test-set-tests-abl-rep-pie.pdf",
plot = combined_plot,
width = 14,
height = 8
)
plot(combined_plot)
# Helper function to format authors
format_authors <- function(authors_field) {
# Handle different input types
if (is.null(authors_field) || length(authors_field) == 0) return("")
# If it's a list, extract the first element
if (is.list(authors_field)) {
if (length(authors_field[[1]]) == 0) return("")
authors_str <- authors_field[[1]]
} else {
authors_str <- authors_field
}
# Handle vectors - take first non-NA element
if (length(authors_str) > 1) {
authors_str <- authors_str[1]
}
# Check if it's NA or empty - handle single element only
if (length(authors_str) == 0 || is.na(authors_str[1]) || authors_str[1] == "") return("")
# Convert to character if needed
authors_str <- as.character(authors_str[1])
# Split authors and format as "First Initial. Last Name"
authors <- strsplit(authors_str, " and ")[[1]]
formatted_authors <- sapply(authors, function(author) {
author <- trimws(author) # Clean whitespace
# Handle "Last, First" format
if (grepl(",", author)) {
parts <- strsplit(author, ",")[[1]]
last_name <- trimws(parts[1])
first_name <- trimws(parts[2])
# Get first initial(s) - handle middle names too
name_parts <- strsplit(first_name, " ")[[1]]
initials <- sapply(name_parts, function(name) {
if (nchar(trimws(name)) > 0) {
paste0(substr(trimws(name), 1, 1), ".")
} else {
""
}
})
initials <- paste(initials[initials != ""], collapse = " ")
return(paste0(initials, " ", last_name))
} else {
# Handle "First Last" format
parts <- strsplit(trimws(author), " ")[[1]]
if (length(parts) >= 2) {
# First name initial(s)
first_parts <- parts[1:(length(parts)-1)]
initials <- sapply(first_parts, function(name) {
if (nchar(trimws(name)) > 0) {
paste0(substr(trimws(name), 1, 1), ".")
} else {
""
}
})
initials <- paste(initials[initials != ""], collapse = " ")
# Last name
last_name <- parts[length(parts)]
return(paste0(initials, " ", last_name))
}
return(author) # Return as-is if can't parse
}
})
return(paste(formatted_authors, collapse = ", "))
}
safe_extract <- function(field) {
if (is.null(field)) return("")
if (is.list(field)) {
if (length(field) == 0 || length(field[[1]]) == 0) return("")
result <- field[[1]][1]
} else {
if (length(field) == 0) return("")
result <- field[1]
}
if (is.na(result)) return("")
return(as.character(result))
}
# Format journal article
format_article <- function(authors, title, year, entry, doi) {
journal <- safe_extract(entry$JOURNAL)
volume <- safe_extract(entry$VOLUME)
pages <- safe_extract(entry$PAGES)
number <- safe_extract(entry$NUMBER)
# Check if it's an article number format (e.g., e00205)
is_article_number <- pages != "" && grepl("^e[0-9]+", pages)
if (is_article_number) {
# Format with article number
ref <- paste0(authors, ", ", year, ". ", title, ". ", journal, ". ", volume, ", ", pages, ".")
} else {
# Standard format
volume_info <- if (volume != "") paste0(" ", volume) else ""
page_info <- if (pages != "") paste0(" ", gsub("-", " – ", pages)) else ""
ref <- paste0(authors, ", ", title, ", ", journal, volume_info, " (", year, ")", page_info, ".")
}
if (doi != "") ref <- paste0(ref, " ", doi, ".")
return(ref)
}
# Format book
format_book <- function(authors, title, year, entry) {
publisher <- safe_extract(entry$PUBLISHER)
address <- safe_extract(entry$ADDRESS)
edition <- safe_extract(entry$EDITION)
edition_text <- if (edition != "") paste0(edition, " ed., ") else ""
location <- if (address != "") paste0(", ", address) else ""
ref <- paste0(authors, ", ", title, ", ", edition_text, publisher, location, ", ", year, ".")
return(ref)
}
# Format book chapter
format_chapter <- function(authors, title, year, entry) {
booktitle <- safe_extract(entry$BOOKTITLE)
editor <- format_authors(entry$EDITOR)
publisher <- safe_extract(entry$PUBLISHER)
address <- safe_extract(entry$ADDRESS)
pages <- safe_extract(entry$PAGES)
editor_text <- if (editor != "") paste0("in: ", editor, " (Eds.), ") else ""
location <- if (address != "") paste0(", ", address) else ""
page_info <- if (pages != "") paste0(", pp. ", gsub("-", " - ", pages)) else ""
ref <- paste0(authors, ", ", title, ", ", editor_text, booktitle, ", ",
publisher, location, ", ", year, page_info, ".")
return(ref)
}
# Format miscellaneous (websites, datasets)
format_misc <- function(authors, title, year, entry, url, doi) {
howpublished <- safe_extract(entry$HOWPUBLISHED)
note <- safe_extract(entry$NOTE)
# Detect if it's a website or dataset
is_dataset <- grepl("dataset|data", paste(title, howpublished, note), ignore.case = TRUE)
if (is_dataset) {
ref <- paste0(authors, ", ", title, " [dataset], ", howpublished, ", ", year, ".")
if (doi != "") ref <- paste0(ref, " ", doi, ".")
} else {
# Website format
access_info <- if (note != "") paste0(" (", note, ")") else ""
ref <- paste0(authors, ", ", title, ". ", url, ", ", year, access_info, ".")
}
return(ref)
}
# Format software
format_software <- function(authors, title, year, entry, doi) {
howpublished <- safe_extract(entry$HOWPUBLISHED)
version <- safe_extract(entry$VERSION)
note <- safe_extract(entry$NOTE)
version_text <- if (version != "") paste0(" ", version) else ""
date_info <- if (note != "") paste0(", ", note) else ""
ref <- paste0(authors, ", ", title, version_text, " [software], ",
howpublished, date_info, ", ", year, ".")
if (doi != "") ref <- paste0(ref, " ", doi, ".")
return(ref)
}
# Default format
format_default <- function(authors, title, year, entry, doi, url) {
ref <- paste0(authors, ", ", title, " (", year, ").")
if (url != "") ref <- paste0(ref, " ", url, ".")
if (doi != "") ref <- paste0(ref, " ", doi, ".")
return(ref)
}
format_proceedings <- function(authors, title, year, entry, doi) {
booktitle <- safe_extract(entry$BOOKTITLE)
pages <- safe_extract(entry$PAGES)
publisher <- safe_extract(entry$PUBLISHER)
address <- safe_extract(entry$ADDRESS)
location <- if (address != "") paste0(", ", address) else ""
page_info <- if (pages != "") paste0(", pp. ", gsub("-", " – ", pages)) else ""
publisher_info <- if (publisher != "") paste0(", ", publisher) else ""
ref <- paste0(authors, ", ", title, ", in: ", booktitle, publisher_info,
location, ", ", year, page_info, ".")
if (doi != "") ref <- paste0(ref, " ", doi, ".")
return(ref)
}
format_bibliography <- function(bib_df) {
if (is.null(bib_df) || nrow(bib_df) == 0) {
return("No bibliography entries found.")
}
formatted_refs <- character(nrow(bib_df))
for (i in 1:nrow(bib_df)) {
entry <- bib_df[i, ]
ref_type <- tolower(safe_extract(entry$CATEGORY))
# Extract common fields safely
bibtexkey <- entry$BIBTEXKEY
authors <- format_authors(entry$AUTHOR)
title <- safe_extract(entry$TITLE)
year <- safe_extract(entry$YEAR)
doi <- safe_extract(entry$DOI)
if (doi != "") doi <- paste0("https://doi.org/", doi)
url <- safe_extract(entry$URL)
# Format based on entry type
formatted_ref <- tryCatch({
switch(ref_type,
"article" = format_article(authors, title, year, entry, doi),
"book" = format_book(authors, title, year, entry),
"incollection" = ,
"inbook" = format_chapter(authors, title, year, entry),
"inproceedings" = format_proceedings(authors, title, year, entry, doi),
"misc" = format_misc(authors, title, year, entry, url, doi),
"techreport" = ,
"manual" = format_software(authors, title, year, entry, doi),
# Default format
format_default(authors, title, year, entry, doi, url)
)
}, error = function(e) {
# Fallback format if there's an error
paste0(authors, ", ", title, " (", year, ").")
})
formatted_refs[i] <- paste0("[", i, "] ", "[", bibtexkey, "] ", formatted_ref)
}
return(paste(formatted_refs, collapse = "\n\n"))
}
# Load and format bibliography
bibligraphy <- load_bibliography('./bibtex-information-fusion-document-classification.bib')
formatted_bibliography <- format_bibliography(bibligraphy)
cat(formatted_bibliography[1])
## [1] [wangq2022] Q. Wang, Cross-domain structure preserving projection for heterogeneous domain adaptation, Pattern Recognition 123 (2022) 108362. https://doi.org/10.1016/j.patcog.2021.108362.
##
## [2] [zhao2023] F. Zhao, Topic identification of text-based expert stock comments using multi-level information fusion, Expert Systems 40 (2020). https://doi.org/10.1111/exsy.12641.
##
## [3] [cgoncalves2022] C. A. Gon<U+FFFD>alves, A Novel Multi-View Ensemble Learning Architecture to Improve the Structured Text Classification, Information 13 (2022) 283. https://doi.org/10.3390/info13060283.
##
## [4] [reil2023] L. Rei, Multimodal metadata assignment for cultural heritage artifacts, Multimedia Systems 29 (2022) 847 – – 869. https://doi.org/10.1007/s00530-022-01025-2.
##
## [5] [debreuij2020] J. A. d. Bruijn, Improving the classification of flood tweets with contextual hydrological information in a multimodal neural network, Computers & Geosciences 140 (2020) 104485. https://doi.org/10.1016/j.cageo.2020.104485.
##
## [6] [liuw2021] W. Liu, Research on Multi-label Text Classification Method Based on tALBERT-CNN, International Journal of Computational Intelligence Systems 14 (2021) 201. https://doi.org/10.1007/s44196-021-00055-4.
##
## [7] [guod2023] D. Guo, A Comparative Study of Speaker Role Identification in Air Traffic Communication Using Deep Learning Approaches, ACM Transactions on Asian and Low-Resource Language Information Processing 22 (2023) 1 – – 17. https://doi.org/10.1145/3572792.
##
## [8] [chatziagapia2022] A. Chatziagapi, Audio and ASR-based Filled Pause Detection, in: 2022 10th International Conference on Affective Computing and Intelligent Interaction (ACII), IEEE, 2022, pp. 1 – – 7. https://doi.org/10.1109/acii55700.2022.9953889.
##
## [9] [andriyanovn2022] N. A. Andriyanov, Combining Text and Image Analysis Methods for Solving Multimodal Classification Problems, Pattern Recognition and Image Analysis 32 (2022) 489 – – 494. https://doi.org/10.1134/s1054661822030026.
##
## [10] [jiangs2024] S. Jiang, Deep Learning for Technical Document Classification, IEEE Transactions on Engineering Management 71 (2024) 1163 – – 1179. https://doi.org/10.1109/tem.2022.3152216.
##
## [11] [luox2022] X. Luo, Effective short text classification via the fusion of hybrid features for IoT social data, Digital Communications and Networks 8 (2022) 942 – – 954. https://doi.org/10.1016/j.dcan.2022.09.015.
##
## [12] [kanchid2022] S. Kanchi, EmmDocClassifier: Efficient Multimodal Document Image Classifier for Scarce Data, Applied Sciences 12 (2022) 1457. https://doi.org/10.3390/app12031457.
##
## [13] [adwaithd2022] D. Adwaith, Enhancing multimodal disaster tweet classification using state-of-the-art deep learning networks, Multimedia Tools and Applications 81 (2022) 18483 – – 18501. https://doi.org/10.1007/s11042-022-12217-3.
##
## [14] [liuj2022] J. Liu, Identifying Adverse Drug Reaction-Related Text from Social Media: A Multi-View Active Learning Approach with Various Document Representations, Information 13 (2022) 189. https://doi.org/10.3390/info13040189.
##
## [15] [sangy2022] Y. Sang, MBTI Personality Prediction for Fictional Characters Using Movie Scripts, in: Findings of the Association for Computational Linguistics: EMNLP 2022, Association for Computational Linguistics (ACL), 2022, pp. 6715 – – 6724. https://doi.org/10.18653/v1/2022.findings-emnlp.500.
##
## [16] [paraskevopoulos2022] G. Paraskevopoulos, Multimodal Classification of Safety-Report Observations, Applied Sciences 12 (2022) 5781. https://doi.org/10.3390/app12125781.
##
## [17] [sapeao2022] O. Sapena, Multimodal Classification of Teaching Activities from University Lecture Recordings, Applied Sciences 12 (2022) 4785. https://doi.org/10.3390/app12094785.
##
## [18] [arlqaraleshs2024] S. Alqaraleh, Multimodal Classifier for Disaster Response, Advanced Engineering, Technology and Applications, Springer Nature Switzerland, 2023, pp. 1 - - 13.
##
## [19] [karisanip2022] P. Karisani, Multi-View Active Learning for Short Text Classification in User-Generated Data, in: Findings of the Association for Computational Linguistics: EMNLP 2022, Association for Computational Linguistics (ACL), 2022, pp. 6441 – – 6453. https://doi.org/10.18653/v1/2022.findings-emnlp.481.
##
## [20] [yuet2022] T. Yue, PaperNet: A Dataset and Benchmark for Fine-Grained Paper Classification, Applied Sciences 12 (2022) 4554. https://doi.org/10.3390/app12094554.
##
## [21] [guq2022] Q. Gu, QiNiAn at SemEval-2022 Task 5: Multi-Modal Misogyny Detection and Classification, in: Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022), Association for Computational Linguistics (ACL), 2022, pp. 736 – – 741. https://doi.org/10.18653/v1/2022.semeval-1.102.
##
## [22] [dongpin2022] L. Dongping, Research on Deep Learning Model of Multimodal Heterogeneous Data Based on LSTM, IAENG International Journal of Computer Science 49 (2022).
##
## [23] [chenl2022] L. Chen, Utilizing Cross-Modal Contrastive Learning to Improve Item Categorization BERT Model, in: Proceedings of The Fifth Workshop on e-Commerce and NLP (ECNLP 5), Association for Computational Linguistics (ACL), 2022, pp. 217 – – 223. https://doi.org/10.18653/v1/2022.ecnlp-1.25.
##
## [24] [ortizperez2023] D. Ortiz-Perez, A Deep Learning-Based Multimodal Architecture to predict Signs of Dementia, Neurocomputing 548 (2023) 126413. https://doi.org/10.1016/j.neucom.2023.126413.
##
## [25] [chos2023] S. Cho, A Framework for Understanding Unstructured Financial Documents Using RPA and Multimodal Approach, Electronics 12 (2023) 939. https://doi.org/10.3390/electronics12040939.
##
## [26] [fujinumay2023] Y. Fujinuma, A Multi-Modal Multilingual Benchmark for Document Image Classification, in: Findings of the Association for Computational Linguistics: EMNLP 2023, Association for Computing Machinery (ACM), 2023, pp. 14361 – – 14376. https://doi.org/10.18653/v1/2023.findings-emnlp.958.
##
## [27] [shah2023] H. Shah, Building a Multimodal Classifier of Email Behavior: Towards a Social Network Understanding of Organizational Communication, Information 14 (2023) 661. https://doi.org/10.3390/info14120661.
##
## [28] [rasheeda2023] A. Rasheed, Cover-based multiple book genre recognition using an improved multimodal network, International Journal on Document Analysis and Recognition (IJDAR) 26 (2022) 65 – – 88. https://doi.org/10.1007/s10032-022-00413-8.
##
## [29] [liut2024] T. Liu, Cross-modal Multiple Granularity Interactive Fusion Network for Long Document Classification, ACM Transactions on Knowledge Discovery from Data 18 (2024) 1 – – 24. https://doi.org/10.1145/3631711.
##
## [30] [samya2023] A. E. Samy, Data-Driven Self-Supervised Graph Representation Learning, ECAI 2023, IOS Press, 2023, pp. 629 - 636.
##
## [31] [jarrahia2023] A. Jarrahi, Evaluating the effectiveness of publishers features in fake news detection on social media, Multimedia Tools and Applications 82 (2022) 2913 – – 2939. https://doi.org/10.1007/s11042-022-12668-8.
##
## [32] [liangz2023] Z. Liang, Fake News Detection Based on Multimodal Inputs, Computers, Materials & Continua 75 (2023) 4519 – – 4534. https://doi.org/10.32604/cmc.2023.037035.
##
## [33] [kozienkop2023] P. Kazienko, Human-centered neural reasoning for subjective content processing: Hate speech, emotions, and humor, Information Fusion 94 (2023) 43 – – 65. https://doi.org/10.1016/j.inffus.2023.01.010.
##
## [34] [graffm2023] M. Graff, Ingeotec at DA-VINCIS: Bag-of-Words Classifiers, in: , CEUR-WS, 2023.
##
## [35] [varmanp2023] P. K. Verma, MCred: multi-modal message credibility for fake news detection using BERT and CNN, Journal of Ambient Intelligence and Humanized Computing 14 (2022) 10617 – – 10629. https://doi.org/10.1007/s12652-022-04338-2.
##
## [36] [tianl2023] L. Tian, MetaTroll: Few-shot Detection of State-Sponsored Trolls with Transformer Adapters, in: Proceedings of the ACM Web Conference 2023, Association for Computing Machinery (ACM), 2023, pp. 1743 – 1753. https://doi.org/10.1145/3543507.3583417.
##
## [37] [kenny2023] Kenny, MULTIMODAL APPROACH FOR EMOTION RECOGNITION USING FEATURE FUSION, ICIC Express Letters 17 (2023) 181 – 189. https://doi.org/10.24507/icicel.17.02.181.
##
## [38] [linckere2023] E. Lincker, Noisy and Unbalanced Multimodal Document Classification: Textbook Exercises as a Use Case, in: 20th International Conference on Content-based Multimedia Indexing, Association for Computing Machinery (ACM), 2023, pp. 71 – – 78. https://doi.org/10.1145/3617233.3617239.
##
## [39] [jarquin2023] H. Jarqu<U+FFFD>n-V<U+FFFD>squez, Overview of DA-VINCIS at IberLEF 2023: Detection of Aggressive and Violent Incidents from Social Media in Spanish, Procesamiento del Lenguaje Natural (2023) 351360. https://doi.org/10.26342/2023-71-27.
##
## [40] [luzdearau2023] P. H. Luz de Araujo, Sequence-aware multimodal page classification of Brazilian legal documents, International Journal on Document Analysis and Recognition (IJDAR) 26 (2022) 33 – – 49. https://doi.org/10.1007/s10032-022-00406-7.
##
## [41] [chenz2023] Z. Chen, Towards Unifying Medical Vision-and-Language Pre-training via Soft Prompts, in: 2023 IEEE/CVF International Conference on Computer Vision (ICCV), IEEE, 2023, pp. 23346 – – 23356. https://doi.org/10.1109/iccv51070.2023.02139.
##
## [42] [zouh2023] H. Zou, UniS-MMC: Multimodal Classification via Unimodality-supervised Multimodal Contrastive Learning, in: Findings of the Association for Computational Linguistics: ACL 2023, Association for Computational Linguistics (ACL), 2023, pp. 659 – 672. https://doi.org/10.18653/v1/2023.findings-acl.41.
##
## [43] [bakkalis2023] S. Bakkali, VLCDoC: Vision-Language contrastive pre-training model for cross-Modal document classification, Pattern Recognition 139 (2023) 109419. https://doi.org/10.1016/j.patcog.2023.109419.
##
## [44] [wajdm2024] M. A. Wajid, A deep learning approach for image and text classification using neutrosophy, International Journal of Information Technology 16 (2023) 853 – – 859. https://doi.org/10.1007/s41870-023-01529-8.
##
## [45] [fengz2024] Z. Feng, Adaptive micro- and macro-knowledge incorporation for hierarchical text classification, Expert Systems with Applications 248 (2024) 123374. https://doi.org/10.1016/j.eswa.2024.123374.
##
## [46] [xuy2024] Y. Xu, An effective multi-modal adaptive contextual feature information fusion method for Chinese long text classification, Artificial Intelligence Review 57 (2024) 1 – 29. https://doi.org/10.1007/s10462-024-10835-x.
##
## [47] [jiz2024] Z. Ji, ASSL-HGAT: Active semi-supervised learning empowered heterogeneous graph attention network, Knowledge-Based Systems 290 (2024) 111567. https://doi.org/10.1016/j.knosys.2024.111567.
##
## [48] [ghorbanali2024] A. Ghorbanali, Capsule network-based deep ensemble transfer learning for multimodal sentiment analysis, Expert Systems with Applications 239 (2024) 122454. https://doi.org/10.1016/j.eswa.2023.122454.
##
## [49] [yushili2024] Y. Li, Compact bilinear pooling and multi-loss network for social media multimodal classification, Signal, Image and Video Processing 18 (2024) 8403 – – 8412. https://doi.org/10.1007/s11760-024-03482-w.
##
## [50] [zhangqi2024] Z. Jiang, Deep Incomplete Multi-View Learning Network with Insufficient Label Information, Proceedings of the AAAI Conference on Artificial Intelligence 38 (2024) 12919 – – 12927. https://doi.org/10.1609/aaai.v38i11.29189.
##
## [51] [xianfang2024] X. Song, Evolutionary computation for feature selection in classification: A comprehensive survey of solutions, applications and challenges, Swarm and Evolutionary Computation 90 (2024) 101661. https://doi.org/10.1016/j.swevo.2024.101661.
##
## [52] [zhangy2024] Y. Zhang, Image Information Prompt: Tips for Learning Large Language Models, Intelligent Computing Technology and Automation, IOS Press, 2024, pp. undefined - undefined.
##
## [53] [liub2024] B. Liu, MinJoT: Multimodal infusion Joint Training for noise learning in text and multimodal classification problems, Information Fusion 102 (2024) 102071. https://doi.org/10.1016/j.inffus.2023.102071.
##
## [54] [tengfeil2024] T. Liu, Multi-modal long document classification based on Hierarchical Prompt and Multi-modal Transformer, Neural Networks 176 (2024) 106322. https://doi.org/10.1016/j.neunet.2024.106322.
##
## [55] [ronghaop2024] R. Pan, Spanish MEACorpus 2023: A multimodal speechtext corpus for emotion analysis in Spanish from natural environments, Computer Standards & Interfaces 90 (2024) 103856. https://doi.org/10.1016/j.csi.2024.103856.
##
## [56] [dasigiv2001] V. Dasigi, Information fusion for text classification an experimental comparison, Pattern Recognition 34 (2001) 2413 – – 2425. https://doi.org/10.1016/s0031-3203(00)00171-0.
##
## [57] [matsubara2005] E. T. Matsubara, Multi-view semi-supervised learning: An approach to obtain different views from text datasets, in: , IOS Press BV, 2005, pp. 97 – 104.
##
## [58] [zhangb2008] B. Zhang, Co-EM Support Vector Machine Based Text Classification from Positive and Unlabeled Examples, in: 2008 First International Conference on Intelligent Networks and Intelligent Systems, IEEE, 2008, pp. 745 – – 748. https://doi.org/10.1109/icinis.2008.29.
##
## [59] [suns2008] S. Sun, Semantic Features for Multi-view Semi-supervised and Active Learning of Text Classification, in: 2008 IEEE International Conference on Data Mining Workshops, IEEE, 2008, pp. 731 – – 735. https://doi.org/10.1109/icdmw.2008.13.
##
## [60] [gup2009] P. Gu, A multi-view approach to semi-supervised document classification with incremental Naive Bayes, Computers & Mathematics with Applications 57 (2009) 1030 – – 1036. https://doi.org/10.1016/j.camwa.2008.10.025.
##
## [61] [zhangx2009] X. Zhang, Batch Mode Active Learning Based Multi-view Text Classification, in: 2009 Sixth International Conference on Fuzzy Systems and Knowledge Discovery, IEEE, 2009, pp. 472 – – 476. https://doi.org/10.1109/fskd.2009.495.
##
## [62] [chenb2009] B. Chen, Document Classification with One-class Multiview Learning, in: 2009 International Conference on Industrial and Information Systems, IEEE, 2009, pp. 289 – – 292. https://doi.org/10.1109/iis.2009.15.
##
## [63] [aminim2009] M. Amini, Learning from Multiple Partially Observed Views - an Application to Multilingual Text Categorization, in: , Neural Information Processing Systems, 2009, pp. 28 – 36.
##
## [64] [chens2009] S. D. Chen, Meta-classifiers for multimodal document classification, in: 2009 IEEE International Workshop on Multimedia Signal Processing, IEEE, 2009, pp. 1 – – 6. https://doi.org/10.1109/mmsp.2009.5293343.
##
## [65] [aminim2010] M. Amini, A co-classification approach to learning from<U+FFFD>multilingual corpora, Machine Learning 79 (2009) 105 – – 121. https://doi.org/10.1007/s10994-009-5151-5.
##
## [66] [zhangx2010b] X. Zhang, A general decision layer text classification fusion model, in: 2010 2nd International Conference on Education Technology and Computer, IEEE, 2010, pp. V5 – 239 – V5 – 241. https://doi.org/10.1109/icetc.2010.5529774.
##
## [67] [suns2010] S. Sun, Active learning with extremely sparse labeled examples, Neurocomputing 73 (2010) 2980 – – 2988. https://doi.org/10.1016/j.neucom.2010.07.007.
##
## [68] [aminim2010b] M. R. Amini, Combining coregularization and consensus-based self-training for multilingual text categorization, in: Proceedings of the 33rd international ACM SIGIR conference on Research and development in information retrieval, ACM, 2010, pp. 475 – – 482. https://doi.org/10.1145/1835449.1835529.
##
## [69] [perezgraciat2010] T. P<U+FFFD>rez-Garc<U+FFFD>a, Harmonic and instrumental information fusion for musical genre classification, in: Proceedings of 3rd international workshop on Machine learning and music, ACM, 2010, pp. 49 – – 52. https://doi.org/10.1145/1878003.1878020.
##
## [70] [zhangx2010] X. Zhang, Study on Multi-layer Fusion Classification Model of Multi-media Information, in: 2010 International Conference on Web Information Systems and Mining, IEEE, 2010, pp. 216 – – 218. https://doi.org/10.1109/wism.2010.126.
##
## [71] [zhengw2011] W. Zheng, Dimensionality Reduction with Category Information Fusion and Non-negative Matrix Factorization for Text Categorization, Artificial Intelligence and Computational Intelligence, Springer Berlin Heidelberg, 2011, pp. 505 - - 512.
##
## [72] [guyo2012] Y. Guo, Cross Language Text Classification via Subspace Co-regularized Multi-view Learning, in: , 2012, pp. 1615 – 1622. https://doi.org/10.48550/arxiv.1206.6481.
##
## [73] [kovesim2012] M. Kovesi, Fast on-line learning for multilingual categorization, in: Proceedings of the 35th international ACM SIGIR conference on Research and development in information retrieval, ACM, 2012, pp. 1071 – – 1072. https://doi.org/10.1145/2348283.2348474.
##
## [74] [yangp2012] P. Yang, Information-theoretic Multi-view Domain Adaptation, in: , 2012, pp. 270 – 274.
##
## [75] [lig2012] G. Li, Multiview Semi-Supervised Learning with Consensus, IEEE Transactions on Knowledge and Data Engineering 24 (2012) 2040 – – 2051. https://doi.org/10.1109/tkde.2011.160.
##
## [76] [zhangb2013] B. Zhang, Classification of big velocity data via cross-domain Canonical Correlation Analysis, in: 2013 IEEE International Conference on Big Data, IEEE, 2013, pp. 493 – – 498. https://doi.org/10.1109/bigdata.2013.6691612.
##
## [77] [liy2013] Y. LI, Combination of multiple feature selection methods for text categorization by using combinatorial fusion analysis and rank-score characteristic, International Journal on Artificial Intelligence Tools 22 (2013) 1350001. https://doi.org/10.1142/s0218213013500012.
##
## [78] [perinaa2013] A. Perina, Documents as multiple overlapping windows into grids of counts, in: , Neural information processing systems foundation, 2013.
##
## [79] [longg2013] G. Long, Graph Based Feature Augmentation for Short and Sparse Text Classification, Advanced Data Mining and Applications, Springer Berlin Heidelberg, 2013, pp. 456 - - 467.
##
## [80] [zhangd2013] D. Zhang, MI2LS: multi-instance learning from multiple informationsources, in: Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining, Association for Computing Machinery (ACM), 2013, pp. 149 – – 157. https://doi.org/10.1145/2487575.2487651.
##
## [81] [cristanim2014] M. Cristani, A Multimodal Approach to Exploit Similarity in Documents, Modern Advances in Applied Intelligence, Springer International Publishing, 2014, pp. 490 - - 499.
##
## [82] [liuj2014] J. Liu, An Embedded Co-AdaBoost based construction of software document relation coupled resource spaces for cyberphysical society, Future Generation Computer Systems 32 (2014) 198 – – 210. https://doi.org/10.1016/j.future.2012.12.017.
##
## [83] [yangp2014] P. Yang, Information-Theoretic Multi-view Domain Adaptation: A Theoretical and Empirical Study, Journal of Artificial Intelligence Research 49 (2014) 501 – – 525. https://doi.org/10.1613/jair.4190.
##
## [84] [liparas2014] D. Liparas, News Articles Classification Using Random Forests and Weighted Multimodal Features, Multidisciplinary Information Retrieval, Springer International Publishing, 2014, pp. 63 - - 75.
##
## [85] [liaox2015] X. L. Liao, A Multi-topic Meta-classification Scheme for Analyzing Lobbying Disclosure Data, in: 2015 IEEE International Conference on Information Reuse and Integration, IEEE, 2015, pp. 349 – – 356. https://doi.org/10.1109/iri.2015.60.
##
## [86] [brefeldu2015] U. Brefeld, Multi-view learning with dependent views, in: Proceedings of the 30th Annual ACM Symposium on Applied Computing, Association for Computing Machinery (ACM), 2015, pp. 865 – – 870. https://doi.org/10.1145/2695664.2695829.
##
## [87] [fakri2015] A. Fakeri-Tabrizi, Multiview self-learning, Neurocomputing 155 (2015) 117 – – 127. https://doi.org/10.1016/j.neucom.2014.12.041.
##
## [88] [iglesias2016] E. L. Iglesias, An HMM-Based Multi-view Co-training Framework for Single-View Text Corpora, Hybrid Artificial Intelligent Systems, Springer International Publishing, 2016, pp. 66 - - 78.
##
## [89] [rajendran2016] J. Rajendran, Bridge Correlational Neural Networks for Multilingual Multimodal Representation Learning, in: Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Association for Computing Machinery (ACM), 2016, pp. 171 – – 181. https://doi.org/10.18653/v1/n16-1021.
##
## [90] [xux2016] X. Xu, Co-Labeling for Multi-View Weakly Labeled Learning, IEEE Transactions on Pattern Analysis and Machine Intelligence 38 (2016) 1113 – – 1125. https://doi.org/10.1109/tpami.2015.2476813.
##
## [91] [sinorar2016] R. A. Sinoara, Semantic role-based representations in text classification, in: 2016 23rd International Conference on Pattern Recognition (ICPR), IEEE, 2016, pp. 2313 – – 2318. https://doi.org/10.1109/icpr.2016.7899981.
##
## [92] [xuh2016] H. Xu, Text Classification with Topic-based Word Embedding and Convolutional Neural Networks, in: Proceedings of the 7th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics, Association for Computing Machinery (ACM), 2016, pp. 88 – – 97. https://doi.org/10.1145/2975167.2975176.
##
## [93] [huz2017] Z. Hu, A deep learning approach for predicting the quality of online health expert question-answering services, Journal of Biomedical Informatics 71 (2017) 241 – – 253. https://doi.org/10.1016/j.jbi.2017.06.012.
##
## [94] [akhtiamovo2017] O. Akhtiamov, Are You Addressing Me? Multimodal Addressee Detection in Human-Human-Computer Conversations, Speech and Computer, Springer International Publishing, 2017, pp. 152 - - 161.
##
## [95] [zhanz2017] Z. Zhan, Document Analysis Based on Multi-view Intact Space Learning with Manifold Regularization, Intelligence Science and Big Data Engineering, Springer International Publishing, 2017, pp. 41 - - 51.
##
## [96] [schmittm2017] M. Schmitt, openXBOW - Introducing the Passau Open-Source Crossmodal Bag-of-Words Toolkit, Journal of Machine Learning Research 18 (2017) 1 – 5. https://doi.org/10.48550/arxiv.1605.06778.
##
## [97] [xuc2017] C. Xu, Multimodal Fusion with Global and Local Features for Text Classification, Neural Information Processing, Springer International Publishing, 2017, pp. 124 - - 134.
##
## [98] [pengj2018] J. Peng, Multiview Boosting With Information Propagation for Classification, IEEE Transactions on Neural Networks and Learning Systems 29 (2018) 657 – – 669. https://doi.org/10.1109/tnnls.2016.2637881.
##
## [99] [argon2018] M. E. Arag<U+FFFD>n, A Straightforward Multimodal Approach for Author Profiling: Notebook for PAN at CLEF 2018, CEUR Workshop Proceedings 2125 (2018).
##
## [100] [ferreira2018] C. H. P. Ferreira, Combining Multiple Views from a Distance Based Feature Extraction for Text Classification, in: 2018 IEEE Congress on Evolutionary Computation (CEC), IEEE, 2018, pp. 1 – – 8. https://doi.org/10.1109/cec.2018.8477772.
##
## [101] [guptad2018] D. Gupta, Empowering First Responders through Automated Multimodal Content Moderation, in: 2018 IEEE International Conference on Cognitive Computing (ICCC), IEEE, 2018, pp. 1 – – 8. https://doi.org/10.1109/iccc.2018.00008.
##
## [102] [akhiamov2018] O. Akhtiamov, Gaze, Prosody and Semantics: Relevance of Various Multimodal Signals to Addressee Detection in Human-Human-Computer Conversations, Speech and Computer, Springer International Publishing, 2018, pp. 1 - - 10.
##
## [103] [tellez2018] E. S. Tellez, Gender identification through multi-modal Tweet analysis using MicroTC and Bag of Visual Words: Notebook for PAN at CLEF 2018, in: , CEUR-WS, 2018.
##
## [104] [matricm2018] M. Martinc, Multilingual gender classification with multi-view deep learning notebook for PAN at CLEF 2018, in: , CEUR-WS, 2018.
##
## [105] [zhup2018] P. Zhu, Multi-view label embedding, Pattern Recognition 84 (2018) 126 – – 135. https://doi.org/10.1016/j.patcog.2018.07.009.
##
## [106] [mmironczuk2020] M. M. Mironczuk, Recognising innovative companies by using a diversified stacked generalisation method for website classification, Applied Intelligence 50 (2019) 42 – – 60. https://doi.org/10.1007/s10489-019-01509-1.
##
## [107] [anget2018] T. Ange, Semi-Supervised Multimodal Deep Learning Model for Polarity Detection in Arguments, in: 2018 International Joint Conference on Neural Networks (IJCNN), IEEE, 2018, pp. 1 – – 8. https://doi.org/10.1109/ijcnn.2018.8489342.
##
## [108] [akhtiamov2019] O. Akhtiamov, A Comparative Study of Classical and Deep Classifiers for Textual Addressee Detection in Human-Human-Machine Conversations, Speech and Computer, Springer International Publishing, 2019, pp. 20 - - 30.
##
## [109] [hoylea2019] A. M. Hoyle, Combining Sentiment Lexica with a Multi-View Variational Autoencoder, in: Proceedings of the 2019 Conference of the North, Association for Computational Linguistics (ACL), 2019, pp. 635 – – 640. https://doi.org/10.18653/v1/n19-1065.
##
## [110] [wangh2019] H. Wang, Co-regularized multi-view sparse reconstruction embedding for dimension reduction, Neurocomputing 347 (2019) 191 – – 199. https://doi.org/10.1016/j.neucom.2019.03.080.
##
## [111] [chens2019] S. Chen, Deep Learning Method with Attention for Extreme Multi-label Text Classification, PRICAI 2019: Trends in Artificial Intelligence, Springer International Publishing, 2019, pp. 179 - - 190.
##
## [112] [mmironczuk2019] M. M. Mironczuk, Empirical evaluation of feature projection algorithms for multi-view text classification, Expert Systems with Applications 130 (2019) 97 – – 112. https://doi.org/10.1016/j.eswa.2019.04.020.
##
## [113] [ravikiranm2019] M. Ravikiran, Fusing Deep Quick Response Code Representations Improves Malware Text Classification, in: Proceedings of the ACM Workshop on Crossmodal Learning and Application, Association for Computing Machinery (ACM), 2019, pp. 11 – – 18. https://doi.org/10.1145/3326459.3329166.
##
## [114] [jainr2019] R. Jain, Multimodal Document Image Classification, in: 2019 International Conference on Document Analysis and Recognition (ICDAR), IEEE, 2019, pp. 71 – – 77. https://doi.org/10.1109/icdar.2019.00021.
##
## [115] [wangh2020] H. Wang, Multi-view reconstructive preserving embedding for dimension reduction, Soft Computing 24 (2019) 7769 – – 7780. https://doi.org/10.1007/s00500-019-04395-4.
##
## [116] [hey2019] Y. He, Multi-view transfer learning with privileged learning framework, Neurocomputing 335 (2019) 131 – – 142. https://doi.org/10.1016/j.neucom.2019.01.019.
##
## [117] [bhatt2019] G. Bhatt, Representation learning using step-based deep multi-modal autoencoders, Pattern Recognition 95 (2019) 12 – – 23. https://doi.org/10.1016/j.patcog.2019.05.032.
##
## [118] [zhu2020] W. Zhu, A Synchronized Word Representation Method With Dual Perceptual Information, IEEE Access 8 (2020) 22335 – – 22344. https://doi.org/10.1109/access.2020.2969983.
##
## [119] [carmona2020] M. <U+FFFD>. <U+FFFD>lvarez Carmona, Author Profiling in Social Media with Multimodal Information, Computaci<U+FFFD>n y Sistemas 24 (2020) 1289 – 1304. https://doi.org/10.13053/cys-24-3-3488.
##
## [120] [doinychko2020] A. Doinychko, Biconditional Generative Adversarial Networks for Multiview Learning with Missing Views, Advances in Information Retrieval, Springer International Publishing, 2020, pp. 807 - - 820.
##
## [121] [hessel2020] J. Hessel, Does my multimodal model learn cross-modal interactions? Its harder to tell than you might think!, in: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), Association for Computational Linguistics (ACL), 2020, pp. 861 – 877. https://doi.org/10.18653/v1/2020.emnlp-main.62.
##
## [122] [braz2020] L. Braz, ImTeNet: Image-Text Classification Network for Abnormality Detection and Automatic Reporting on Musculoskeletal Radiographs, Advances in Bioinformatics and Computational Biology, Springer International Publishing, 2020, pp. 150 - - 161.
##
## [123] [dacosta2022] T. A. G. d. Costa, Providing a greater precision of Situational Awareness of urban floods through Multimodal Fusion, Expert Systems with Applications 188 (2022) 115923. https://doi.org/10.1016/j.eswa.2021.115923.
##
## [124] [garg2021] S. Garg, On-Device Document Classification using multimodal features, in: Proceedings of the 3rd ACM India Joint International Conference on Data Science & Management of Data (8th ACM IKDD CODS & 26th COMAD), ACM, 2021, pp. 203 – – 207. https://doi.org/10.1145/3430984.3431030.
##
## [125] [huc2021] C. Hu, One-class Text Classification with Multi-modal Deep Support Vector Data Description, in: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, Association for Computational Linguistics, 2021, pp. 3378 – – 3390. https://doi.org/10.18653/v1/2021.eacl-main.296.
##
## [126] [gui2021] M. Gui, Technology Forecasting Using Deep Learning Neural Network: Taking the Case of Robotics, IEEE Access 9 (2021) 53306 – – 53316. https://doi.org/10.1109/access.2021.3070105.
##
## [127] [gallo2021] I. Gallo, Visual Word Embedding for Text Classification, Pattern Recognition. ICPR International Workshops and Challenges, Springer International Publishing, 2021, pp. 339 - - 352.
##
## [128] [setiawan2021] E. Setiawan, Multiview Sentiment Analysis with Image-Text-Concept Features of Indonesian Social Media Posts, International Journal of Intelligent Engineering and Systems 14 (2021) 521 – – 535. https://doi.org/10.22266/ijies2021.0430.47.
##
## [129] [jiax2021] X. Jia, Semi-Supervised Multi-View Deep Discriminant Representation Learning, IEEE Transactions on Pattern Analysis and Machine Intelligence 43 (2021) 2496 – – 2509. https://doi.org/10.1109/tpami.2020.2973634.
##
## [130] [sus2021] S. Su, A Dynamic Discriminative Canonical Correlation Analysis via Adaptive Weight Scheme, IEEE Access 9 (2021) 142653 – – 142663. https://doi.org/10.1109/access.2021.3118023.
##
## [131] [guelorget2021] P. Gu<U+FFFD>lorget, Active learning to measure opinion and violence in French newspapers, Procedia Computer Science 192 (2021) 202 – – 211. https://doi.org/10.1016/j.procs.2021.08.021.
##
## [132] [liang2021] Y. Liang, Fusion of heterogeneous attention mechanisms in multi-view convolutional neural network for text classification, Information Sciences 548 (2021) 295 – – 312. https://doi.org/10.1016/j.ins.2020.10.021.
##
## [133] [wang2021] X. Wang, Implicit Emotion Relationship Mining Based on Optimal and Majority Synthesis From Multimodal Data Prediction, IEEE MultiMedia 28 (2021) 96 – – 105. https://doi.org/10.1109/mmul.2021.3071495.
##
## [134] [zhang2021] Y. Zhang, Learning sentiment sentence representation with multiview attention model, Information Sciences 571 (2021) 459 – – 474. https://doi.org/10.1016/j.ins.2021.05.044.
##
## [135] [zingaro2021] S. P. Zingaro, Multimodal Side- Tuning for Document Classification, in: 2020 25th International Conference on Pattern Recognition (ICPR), IEEE, 2021, pp. 5206 – – 5213. https://doi.org/10.1109/icpr48806.2021.9413208.
##
## [136] [max2020] X. Ma, Particle Filter Recurrent Neural Networks, in: , Association for the Advancement of Artificial Intelligence (AAAI), 2020, pp. 5101 – – 5108. https://doi.org/10.1609/aaai.v34i04.5952.
##
## [137] [maf2020] F. Ma, Self-paced multi-view co-training, Journal of Machine Learning Research 21 (2020).
##
## [138] [lij2020] J. Li, TextShield: Robust Text Classification Based on Multimodal Embedding and Neural Machine Translation, in: , 2020.
##
## [139] [ma2021] C. Ma, On the (In)Effectiveness of Images for Text Classification, in: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, Association for Computational Linguistics, 2021, pp. 42 – – 48. https://doi.org/10.18653/v1/2021.eacl-main.4.